pcsx2: Implemented Threaded VU1 :D

Threading VU1 took a lot of rewrites and new code to make possible (MTGS, microVU, gifUnit...), but we finally got to the point where it was feasible, and now we've done it! (so now everyone can stop complaining that pcsx2 only takes advantages of 2 cores :p).

The speedups in the games that benefit from it are great if you have a cpu with 3+ cores (generally a 10~45% speedup), however games that are GS limited can be a slowdown (especially on dual core cpu's).

The option can be found in the speedhacks section as "MTVU (Multi-Threaded microVU1)". And when enabled it should should show the VU thread-time percentage on the title bar window (Like we currently do for EE/GS/UI threads).

It is listed as a speedhack because in order for threading VU1 to have been a speedup, we need to assume that games will not send gif packets containing Signal/Finish/Label commands from path 1 (vu1's xgkick). The good news is very-few games ever do this, so the compatibility of MTVU is very high (a game that does do this will likely hang).

Note: vs2010 builds and Linux builds need to be updated to include "MTVU.h" and "MTVU.cpp".


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4865 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2011-08-12 02:31:49 +00:00
parent 60cec5a9b0
commit ac9bf45f98
60 changed files with 1186 additions and 434 deletions

View File

@ -342,8 +342,8 @@ protected:
struct _EXCEPTION_POINTERS;
extern int SysPageFaultExceptionFilter(struct _EXCEPTION_POINTERS* eps);
# define PCSX2_PAGEFAULT_PROTECT __try
# define PCSX2_PAGEFAULT_EXCEPT __except(SysPageFaultExceptionFilter(GetExceptionInformation())) {}
# define PCSX2_PAGEFAULT_PROTECT __try
# define PCSX2_PAGEFAULT_EXCEPT __except(SysPageFaultExceptionFilter(GetExceptionInformation())) {}
#else
# error PCSX2 - Unsupported operating system platform.
@ -352,5 +352,7 @@ extern int SysPageFaultExceptionFilter(struct _EXCEPTION_POINTERS* eps);
extern void pxInstallSignalHandler();
extern void _platform_InstallSignalHandler();
#include "Threading.h"
extern SrcType_PageFault* Source_PageFault;
extern Threading::Mutex PageFault_Mutex;

View File

@ -179,17 +179,20 @@ namespace Threading
// from these little beasties! (these are all implemented internally using cross-platform
// implementations of _InterlockedExchange and such)
extern u32 AtomicRead( volatile u32& Target );
extern s32 AtomicRead( volatile s32& Target );
extern u32 AtomicExchange( volatile u32& Target, u32 value );
extern u32 AtomicExchangeAdd( volatile u32& Target, u32 value );
extern u32 AtomicIncrement( volatile u32& Target );
extern u32 AtomicDecrement( volatile u32& Target );
extern s32 AtomicExchange( volatile s32& Target, s32 value );
extern u32 AtomicExchangeAdd( volatile u32& Target, u32 value );
extern s32 AtomicExchangeAdd( volatile s32& Target, s32 value );
extern s32 AtomicExchangeSub( volatile s32& Target, s32 value );
extern u32 AtomicIncrement( volatile u32& Target );
extern s32 AtomicIncrement( volatile s32& Target );
extern u32 AtomicDecrement( volatile u32& Target );
extern s32 AtomicDecrement( volatile s32& Target );
extern bool AtomicBitTestAndReset( volatile u32& bitset, u8 bit );
extern bool AtomicBitTestAndReset( volatile s32& bitset, u8 bit );
extern void* _AtomicExchangePointer( volatile uptr& target, uptr value );
extern void* _AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand );
@ -393,5 +396,34 @@ namespace Threading
bool Failed() const { return !m_IsLocked; }
};
// --------------------------------------------------------------------------------------
// ScopedLockBool
// --------------------------------------------------------------------------------------
// A ScopedLock in which you specify an external bool to get updated on locks/unlocks.
// Note that the isLockedBool should only be used as an indicator for the locked status,
// and not actually depended on for thread synchronization...
struct ScopedLockBool {
ScopedLock m_lock;
volatile __aligned(4) bool& m_bool;
ScopedLockBool(Mutex& mutexToLock, volatile __aligned(4) bool& isLockedBool)
: m_lock(mutexToLock),
m_bool(isLockedBool) {
m_bool = m_lock.IsLocked();
}
virtual ~ScopedLockBool() throw() {
m_bool = false;
}
void Acquire() {
m_lock.Acquire();
m_bool = m_lock.IsLocked();
}
void Release() {
m_bool = false;
m_lock.Release();
}
};
}

View File

@ -35,10 +35,12 @@ enum XMMSSEType
// as a project option. The multithreaded emitter relies on native compiler support for
// TLS -- Macs are crap out of luck there (for now).
#include "Utilities/Threading.h"
#ifndef x86EMIT_MULTITHREADED
# define x86EMIT_MULTITHREADED 0
#else
# if !PCSX2_THREAD_LOCAL
# if PCSX2_THREAD_LOCAL
# define x86EMIT_MULTITHREADED 1
# else
// No TLS support? Force-clear the MT flag:
# pragma message("x86emitter: TLS not available, multithreaded emitter disabled.")
# undef x86EMIT_MULTITHREADED

View File

@ -46,6 +46,12 @@ static void SysPageFaultSignalFilter( int signal, siginfo_t *siginfo, void * )
// Note: Use of stdio functions isn't safe here. Avoid console logs,
// assertions, file logs, or just about anything else useful.
// Note: This signal can be accessed by the EE or MTVU thread
// Source_PageFault is a global variable with its own state information
// so for now we lock this exception code unless someone can fix this better...
Threading::ScopedLock lock(PageFault_Mutex);
Source_PageFault->Dispatch( PageFaultInfo( (uptr)siginfo->si_addr & ~m_pagemask ) );
// resumes execution right where we left off (re-executes instruction that

View File

@ -786,72 +786,70 @@ void Threading::WaitEvent::Wait()
// InterlockedExchanges / AtomicExchanges (PCSX2's Helper versions)
// --------------------------------------------------------------------------------------
// define some overloads for InterlockedExchanges for commonly used types, like u32 and s32.
// Note: For all of these atomic operations below to be atomic, the variables need to be 4-byte
// aligned. Read: http://msdn.microsoft.com/en-us/library/ms684122%28v=vs.85%29.aspx
__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit )
{
__fi u32 Threading::AtomicRead(volatile u32& Target) {
return Target; // Properly-aligned 32-bit reads are atomic
}
__fi s32 Threading::AtomicRead(volatile s32& Target) {
return Target; // Properly-aligned 32-bit reads are atomic
}
__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit ) {
return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0;
}
__fi bool Threading::AtomicBitTestAndReset( volatile s32& bitset, u8 bit ) {
return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0;
}
__fi u32 Threading::AtomicExchange( volatile u32& Target, u32 value )
{
__fi u32 Threading::AtomicExchange(volatile u32& Target, u32 value ) {
return _InterlockedExchange( (volatile long*)&Target, value );
}
__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) {
return _InterlockedExchange( (volatile long*)&Target, value );
}
__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value )
{
__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, value );
}
__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, value );
}
__fi u32 Threading::AtomicIncrement( volatile u32& Target )
{
return _InterlockedExchangeAdd( (volatile long*)&Target, 1 );
}
__fi u32 Threading::AtomicDecrement( volatile u32& Target )
{
return _InterlockedExchangeAdd( (volatile long*)&Target, -1 );
}
__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value )
{
return _InterlockedExchange( (volatile long*)&Target, value );
}
__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value )
{
return _InterlockedExchangeAdd( (volatile long*)&Target, value );
}
__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value )
{
__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, -value );
}
__fi s32 Threading::AtomicIncrement( volatile s32& Target )
{
__fi u32 Threading::AtomicIncrement( volatile u32& Target ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, 1 );
}
__fi s32 Threading::AtomicIncrement( volatile s32& Target) {
return _InterlockedExchangeAdd( (volatile long*)&Target, 1 );
}
__fi s32 Threading::AtomicDecrement( volatile s32& Target )
{
__fi u32 Threading::AtomicDecrement( volatile u32& Target ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, -1 );
}
__fi s32 Threading::AtomicDecrement(volatile s32& Target) {
return _InterlockedExchangeAdd((volatile long*)&Target, -1);
}
__fi void* Threading::_AtomicExchangePointer( volatile uptr& target, uptr value )
__fi void* Threading::_AtomicExchangePointer(volatile uptr& target, uptr value)
{
#ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place.
return (void*)_InterlockedExchange64( &(volatile s64&)target, value );
return (void*)_InterlockedExchange64(&(volatile s64&)target, value);
#else
return (void*)_InterlockedExchange( (volatile long*)&target, value );
return (void*)_InterlockedExchange((volatile long*)&target, value);
#endif
}
__fi void* Threading::_AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand )
__fi void* Threading::_AtomicCompareExchangePointer(volatile uptr& target, uptr value, uptr comparand)
{
#ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place.
return (void*)_InterlockedCompareExchange64( &(volatile s64&)target, value );
return (void*)_InterlockedCompareExchange64(&(volatile s64&)target, value);
#else
return (void*)_InterlockedCompareExchange( &(volatile long&)target, value, comparand );
return (void*)_InterlockedCompareExchange(&(volatile long&)target, value, comparand);
#endif
}

View File

@ -26,11 +26,11 @@
template class EventSource< IEventListener_PageFault >;
SrcType_PageFault* Source_PageFault = NULL;
Threading::Mutex PageFault_Mutex;
void pxInstallSignalHandler()
{
if (!Source_PageFault)
{
if(!Source_PageFault) {
Source_PageFault = new SrcType_PageFault();
}

View File

@ -25,6 +25,10 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
if( eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION )
return EXCEPTION_CONTINUE_SEARCH;
// Note: This exception can be accessed by the EE or MTVU thread
// Source_PageFault is a global variable with its own state information
// so for now we lock this exception code unless someone can fix this better...
Threading::ScopedLock lock(PageFault_Mutex);
Source_PageFault->Dispatch( PageFaultInfo( (uptr)eps->ExceptionRecord->ExceptionInformation[1] ) );
return Source_PageFault->WasHandled() ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH;
}

View File

@ -377,7 +377,8 @@ struct Pcsx2Config
IntcStat :1, // tells Pcsx2 to fast-forward through intc_stat waits.
WaitLoop :1, // enables constant loop detection and fast-forwarding
vuFlagHack :1, // microVU specific flag hack
vuBlockHack :1; // microVU specific block flag no-propagation hack
vuBlockHack :1, // microVU specific block flag no-propagation hack
vuThread :1; // Enable Threaded VU1
BITFIELD_END
u8 EECycleRate; // EE cycle rate selector (1.0, 1.5, 2.0)
@ -471,6 +472,7 @@ TraceLogFilters& SetTraceConfig();
// ------------ CPU / Recompiler Options ---------------
#define THREAD_VU1 (EmuConfig.Cpu.Recompiler.UseMicroVU1 && EmuConfig.Speedhacks.vuThread)
#define CHECK_MICROVU0 (EmuConfig.Cpu.Recompiler.UseMicroVU0)
#define CHECK_MICROVU1 (EmuConfig.Cpu.Recompiler.UseMicroVU1)
#define CHECK_EEREC (EmuConfig.Cpu.Recompiler.EnableEE && GetCpuProviders().IsRecAvailable_EE())

View File

@ -17,9 +17,8 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "GS.h"
#include "Gif_Unit.h"
#include "Vif.h"
#include "Vif_Dma.h"
#include "IPU/IPU.h"

View File

@ -19,7 +19,6 @@
#include <list>
#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "Counters.h"

View File

@ -245,6 +245,7 @@ enum MTGS_RingCommand
, GS_RINGTYPE_MODECHANGE // for issued mode changes.
, GS_RINGTYPE_CRC
, GS_RINGTYPE_GSPACKET
, GS_RINGTYPE_MTVU_GSPACKET
};
@ -263,8 +264,8 @@ class SysMtgsThread : public SysThreadBase
public:
// note: when m_ReadPos == m_WritePos, the fifo is empty
uint m_ReadPos; // cur pos gs is reading from
uint m_WritePos; // cur pos ee thread is writing to
__aligned(4) uint m_ReadPos; // cur pos gs is reading from
__aligned(4) uint m_WritePos; // cur pos ee thread is writing to
volatile bool m_RingBufferIsBusy;
volatile u32 m_SignalRingEnable;
@ -273,7 +274,9 @@ public:
volatile s32 m_QueuedFrameCount;
volatile u32 m_VsyncSignalListener;
Mutex m_mtx_RingBufferBusy;
Mutex m_mtx_RingBufferBusy; // Is obtained while processing ring-buffer data
Mutex m_mtx_RingBufferBusy2; // This one gets released on semaXGkick waiting...
Mutex m_mtx_WaitGS;
Semaphore m_sem_OnRingReset;
Semaphore m_sem_Vsync;
@ -304,8 +307,7 @@ public:
virtual ~SysMtgsThread() throw();
// Waits for the GS to empty out the entire ring buffer contents.
// Used primarily for plugin startup/shutdown.
void WaitGS();
void WaitGS(bool syncRegs=true, bool weakWait=false, bool isMTVU=false);
void ResetGS();
void PrepDataPacket( MTGS_RingCommand cmd, u32 size );

View File

@ -17,7 +17,6 @@
#include "Common.h"
#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "Vif_Dma.h"
@ -87,6 +86,7 @@ __fi void gifInterrupt()
}
static u32 WRITERING_DMA(u32 *pMem, u32 qwc) {
//qwc = min(qwc, 1024u);
uint size = gifUnit.TransferGSPacketData(GIF_TRANS_DMA, (u8*)pMem, qwc*16) / 16;
incGifChAddr(size);
return size;

View File

@ -35,15 +35,17 @@ enum GIF_PATH {
enum GIF_TRANSFER_TYPE {
GIF_TRANS_INVALID = 0x000, // Invalid
GIF_TRANS_XGKICK = 0x100, // Path 1
GIF_TRANS_DIRECT = 0x201, // Path 2
GIF_TRANS_DIRECTHL = 0x301, // Path 2
GIF_TRANS_DMA = 0x402, // Path 3
GIF_TRANS_FIFO = 0x502 // Path 3
GIF_TRANS_MTVU = 0x200, // Path 1
GIF_TRANS_DIRECT = 0x301, // Path 2
GIF_TRANS_DIRECTHL = 0x401, // Path 2
GIF_TRANS_DMA = 0x502, // Path 3
GIF_TRANS_FIFO = 0x602 // Path 3
};
static const char Gif_TransferStr[6][32] = {
static const char Gif_TransferStr[7][32] = {
"Invalid Transfer Type",
"GIF_TRANS_XGKICK",
"GIF_TRANS_MTVU",
"GIF_TRANS_DIRECT",
"GIF_TRANS_DIRECTHL",
"GIF_TRANS_DMA",

View File

@ -15,7 +15,6 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Gif.h"
#include "Gif_Unit.h"
#define GIF_PARSE DevCon.WriteLn

View File

@ -19,6 +19,7 @@
#include "GS.h"
#include "Gif_Unit.h"
#include "Vif_Dma.h"
#include "MTVU.h"
Gif_Unit gifUnit;
@ -76,12 +77,32 @@ bool Gif_HandlerAD(u8* pMem) {
return false;
}
// Returns true if pcsx2 needed to process the packet...
bool Gif_HandlerAD_Debug(u8* pMem) {
u32 reg = pMem[8];
if (reg == 0x50) { Console.Error("GIF Handler Debug - BITBLTBUF"); return 1; }
elif (reg == 0x52) { Console.Error("GIF Handler Debug - TRXREG"); return 1; }
elif (reg == 0x53) { Console.Error("GIF Handler Debug - TRXDIR"); return 1; }
elif (reg == 0x60) { Console.Error("GIF Handler Debug - SIGNAL"); return 1; }
elif (reg == 0x61) { Console.Error("GIF Handler Debug - FINISH"); return 1; }
elif (reg == 0x62) { Console.Error("GIF Handler Debug - LABEL"); return 1; }
elif (reg >= 0x63 && reg != 0x7f) {
DevCon.Warning("GIF Handler Debug - Write to unknown register! [reg=%x]", reg);
}
return 0;
}
void Gif_FinishIRQ() {
if (CSRreg.FINISH && !(GSIMR&0x200)) {
gsIrq();
}
}
// Used in MTVU mode... MTVU will later complete a real packet
void Gif_AddGSPacketMTVU(GS_Packet& gsPack, GIF_PATH path) {
GetMTGS().SendSimpleGSPacket(GS_RINGTYPE_MTVU_GSPACKET, 0, 0, path);
}
void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) {
//DevCon.WriteLn("Adding Completed Gif Packet [size=%x]", gsPack.size);
if (COPY_GS_PACKET_TO_MTGS) {
@ -91,6 +112,7 @@ void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) {
GetMTGS().SendDataPacket();
}
else {
pxAssertDev(!gsPack.readAmount, "Gif Unit - gsPack.readAmount only valid for MTVU path 1!");
AtomicExchangeAdd(gifUnit.gifPath[path].readAmount, gsPack.size);
GetMTGS().SendSimpleGSPacket(GS_RINGTYPE_GSPACKET, gsPack.offset, gsPack.size, path);
}
@ -102,35 +124,47 @@ void Gif_AddBlankGSPacket(u32 size, GIF_PATH path) {
GetMTGS().SendSimpleGSPacket(GS_RINGTYPE_GSPACKET, ~0u, size, path);
}
void Gif_MTGS_Wait() {
GetMTGS().WaitGS();
}
void Gif_Execute() {
gifUnit.Execute();
void Gif_MTGS_Wait(bool isMTVU) {
GetMTGS().WaitGS(false, true, isMTVU);
}
void SaveStateBase::gifPathFreeze(u32 path) {
Gif_Path& gifPath = gifUnit.gifPath[path];
pxAssertDev(gifPath.readAmount==0, "Gif Path readAmount should be 0!");
pxAssertDev(!gifPath.readAmount, "Gif Path readAmount should be 0!");
pxAssertDev(!gifPath.gsPack.readAmount, "GS Pack readAmount should be 0!");
pxAssertDev(!gifPath.GetPendingGSPackets(), "MTVU GS Pack Queue should be 0!");
if (IsSaving()) { // Move all the buffered data to the start of buffer
gifPath.RealignPacket(); // May add readAmount which we need to clear on load
}
u8* bufferPtr = gifPath.buffer; // Backup current buffer ptr
Freeze(gifPath);
Freeze(gifPath.mtvu.fakePackets);
FreezeMem(&gifPath, sizeof(gifPath) - sizeof(gifPath.mtvu));
FreezeMem(bufferPtr, gifPath.curSize);
gifPath.buffer = bufferPtr;
if (!IsSaving()) gifPath.readAmount = 0;
if(!IsSaving()) {
gifPath.readAmount = 0;
gifPath.gsPack.readAmount = 0;
}
}
void SaveStateBase::gifFreeze() {
Gif_MTGS_Wait();
bool mtvuMode = THREAD_VU1;
pxAssert(vu1Thread.IsDone());
GetMTGS().WaitGS();
FreezeTag("Gif Unit");
Freeze(mtvuMode);
Freeze(gifUnit.stat);
Freeze(gifUnit.gsSIGNAL);
Freeze(gifUnit.lastTranType);
gifPathFreeze(GIF_PATH_1);
gifPathFreeze(GIF_PATH_2);
gifPathFreeze(GIF_PATH_3);
if (!IsSaving()) {
if (mtvuMode != THREAD_VU1) {
DevCon.Warning("gifUnit: MTVU Mode has switched between save/load state");
// ToDo: gifUnit.SwitchMTVU(mtvuMode);
}
}
}

View File

@ -14,11 +14,16 @@
*/
#pragma once
#include <deque>
#include "System/SysThreads.h"
#include "Gif.h"
struct GS_Packet;
extern void Gif_MTGS_Wait();
extern void Gif_MTGS_Wait(bool isMTVU);
extern void Gif_FinishIRQ();
extern bool Gif_HandlerAD(u8* pMem);
extern bool Gif_HandlerAD_Debug(u8* pMem);
extern void Gif_AddBlankGSPacket(u32 size, GIF_PATH path);
extern void Gif_AddGSPacketMTVU (GS_Packet& gsPack, GIF_PATH path);
extern void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path);
extern void Gif_ParsePacket(u8* data, u32 size, GIF_PATH path);
extern void Gif_ParsePacket(GS_Packet& gsPack, GIF_PATH path);
@ -105,10 +110,11 @@ struct Gif_Tag {
};
struct GS_Packet {
u32 offset; // Path buffer offset for start of packet
u32 size; // Full size of GS-Packet
s32 cycles; // EE Cycles taken to process this GS packet
bool done; // 0 = Incomplete, 1 = Complete
u32 offset; // Path buffer offset for start of packet
u32 size; // Full size of GS-Packet
s32 cycles; // EE Cycles taken to process this GS packet
s32 readAmount; // Dummy read-amount data needed for proper buffer calculations
bool done; // 0 = Incomplete, 1 = Complete
GS_Packet() { Reset(); }
void Reset() { memzero(*this); }
};
@ -124,8 +130,16 @@ static __fi void incTag(u32& offset, u32& size, u32 incAmount) {
offset += incAmount;
}
struct Gif_Path_MTVU {
u32 fakePackets; // Fake packets pending to be sent to MTGS
Mutex gsPackMutex; // Used for atomic access to gsPackQueue
std::deque<GS_Packet> gsPackQueue; // VU1 programs' XGkick(s)
Gif_Path_MTVU() { Reset(); }
void Reset() { fakePackets = 0; gsPackQueue.clear(); }
};
struct Gif_Path {
volatile s32 __aligned(4) readAmount; // Amount of data MTGS still needs to read
__aligned(4) volatile s32 readAmount; // Amount of data MTGS still needs to read
u8* buffer; // Path packet buffer
u32 buffSize; // Full size of buffer
u32 buffLimit; // Cut off limit to wrap around
@ -135,6 +149,7 @@ struct Gif_Path {
GS_Packet gsPack; // Current GS Packet info
GIF_PATH idx; // Gif Path Index
GIF_PATH_STATE state; // Path State
Gif_Path_MTVU mtvu; // Must be last for saved states
Gif_Path() {}
~Gif_Path() { _aligned_free(buffer); }
@ -156,6 +171,7 @@ struct Gif_Path {
//curOffset = curSize;
return;
}
mtvu.Reset();
curSize = 0;
curOffset = 0;
readAmount = 0;
@ -163,32 +179,38 @@ struct Gif_Path {
gsPack.Reset();
}
bool isMTVU() { return !idx && THREAD_VU1; }
s32 getReadAmount() { return AtomicRead(readAmount) + gsPack.readAmount; }
bool hasDataRemaining() { return curOffset < curSize; }
bool isDone() { return !hasDataRemaining() && state == GIF_PATH_IDLE; }
bool isDone() { return isMTVU() ? !mtvu.fakePackets
: (!hasDataRemaining() && state == GIF_PATH_IDLE); }
// Waits on the MTGS to process gs packets
void mtgsReadWait() {
//pxAssertDev(AtomicExchangeAdd(readAmount, 0) != 0, "Gif Path Buffer Overflow!");
DevCon.WriteLn(Color_Red, "Gif Path[%d] - MTGS Wait! [r=0x%x]",
idx+1, AtomicExchangeAdd(readAmount, 0));
Gif_MTGS_Wait();
if (IsDevBuild) {
DevCon.WriteLn(Color_Red, "Gif Path[%d] - MTGS Wait! [r=0x%x]", idx+1, getReadAmount());
Gif_MTGS_Wait(isMTVU());
DevCon.WriteLn(Color_Green, "Gif Path[%d] - MTGS Wait! [r=0x%x]", idx+1, getReadAmount());
return;
}
Gif_MTGS_Wait(isMTVU());
}
// Moves packet data to start of buffer
void RealignPacket() {
extern void Gif_AddBlankGSPacket(u32 size, GIF_PATH path);
GUNIT_LOG("Path Buffer: Realigning packet!");
s32 offset = curOffset - gsPack.size;
s32 sizeToAdd = curSize - offset;
s32 intersect = sizeToAdd - offset;
if (intersect < 0) intersect = 0;
for(;;) {
s32 frontFree = offset - AtomicExchangeAdd(readAmount, 0);
s32 frontFree = offset - getReadAmount();
if (frontFree >= sizeToAdd - intersect) break;
mtgsReadWait();
}
if (offset < (s32)buffLimit) { // Needed for correct readAmount values
Gif_AddBlankGSPacket(buffLimit - offset, idx);
if (isMTVU()) gsPack.readAmount += buffLimit - offset;
else Gif_AddBlankGSPacket(buffLimit - offset, idx);
}
//DevCon.WriteLn("Realign Packet [%d]", curSize - offset);
if (intersect) memmove(buffer, &buffer[offset], curSize - offset);
@ -200,12 +222,12 @@ struct Gif_Path {
void CopyGSPacketData(u8* pMem, u32 size, bool aligned = false) {
if (curSize + size > buffSize) { // Move gsPack to front of buffer
DevCon.Warning("CopyGSPacketData: Realigning packet!");
GUNIT_LOG("CopyGSPacketData: Realigning packet!");
RealignPacket();
}
for(;;) {
s32 offset = curOffset - gsPack.size;
s32 readPos = offset - AtomicExchangeAdd(readAmount, 0);
s32 readPos = offset - getReadAmount();
if (readPos >= 0) break; // MTGS is reading in back of curOffset
if ((s32)buffLimit + readPos > (s32)curSize + (s32)size) break; // Enough free front space
mtgsReadWait(); // Let MTGS run to free up buffer space
@ -217,12 +239,21 @@ struct Gif_Path {
}
// If completed a GS packet (with EOP) then returned GS_Packet.done = 1
// MTVU: This function only should be called called on EE thread
GS_Packet ExecuteGSPacket() {
if (mtvu.fakePackets) { // For MTVU mode...
mtvu.fakePackets--;
GS_Packet fakePack;
fakePack.done = 1; // Fake packets don't get processed by pcsx2
fakePack.size =~0u; // Used to indicate that its a fake packet
return fakePack;
}
pxAssert(!isMTVU());
for(;;) {
if (!gifTag.isValid) { // Need new Gif Tag
// We don't have enough data for a Gif Tag
if (curOffset + 16 > curSize) {
GUNIT_LOG("Path Buffer: Not enough data for gif tag! [%d]", curSize-curOffset);
//GUNIT_LOG("Path Buffer: Not enough data for gif tag! [%d]", curSize-curOffset);
return gsPack;
}
@ -249,7 +280,7 @@ struct Gif_Path {
while(gifTag.nLoop && !dblSIGNAL) {
if (curOffset + 16 > curSize) return gsPack; // Exit Early
if (gifTag.curReg() == GIF_REG_A_D) {
dblSIGNAL = Gif_HandlerAD(&buffer[curOffset]);
if (!isMTVU()) dblSIGNAL = Gif_HandlerAD(&buffer[curOffset]);
}
incTag(curOffset, gsPack.size, 16); // 1 QWC
gifTag.packedStep();
@ -271,6 +302,84 @@ struct Gif_Path {
}
}
}
// MTVU: Gets called on VU XGkicks on MTVU thread
void ExecuteGSPacketMTVU() {
// Move packet to start of buffer
if (curOffset > buffLimit) {
RealignPacket();
}
if (IsDevBuild) { // We check the packet to see if it actually
for(;;) { // needed to be processed by pcsx2...
if (curOffset + 16 > curSize) break;
gifTag.setTag(&buffer[curOffset], 1);
if(!gifTag.hasAD && curOffset + 16 + gifTag.len > curSize) break;
incTag(curOffset, gsPack.size, 16); // Tag Size
if (gifTag.hasAD) { // Only can be true if GIF_FLG_PACKED
while(gifTag.nLoop) {
if (curOffset + 16 > curSize) break; // Exit Early
if (gifTag.curReg() == GIF_REG_A_D) {
pxAssert(!Gif_HandlerAD_Debug(&buffer[curOffset]));
}
incTag(curOffset, gsPack.size, 16); // 1 QWC
gifTag.packedStep();
}
}
else incTag(curOffset, gsPack.size, gifTag.len); // Data length
if (curOffset >= curSize) break;
if (gifTag.tag.EOP) break;
}
pxAssert(curOffset == curSize);
gifTag.isValid = false;
}
else {
// We assume every packet is a full GS Packet
// And we don't process anything on pcsx2 side
gsPack.size += curSize - curOffset;
curOffset = curSize;
}
}
// MTVU: Gets called after VU1 execution on MTVU thread
void FinishGSPacketMTVU() {
if (1) {
ScopedLock lock(mtvu.gsPackMutex);
AtomicExchangeAdd(readAmount, gsPack.size + gsPack.readAmount);
mtvu.gsPackQueue.push_back(gsPack);
}
gsPack.Reset();
gsPack.offset = curOffset;
}
// MTVU: Gets called by MTGS thread
GS_Packet GetGSPacketMTVU() {
ScopedLock lock(mtvu.gsPackMutex);
if (mtvu.gsPackQueue.size()) {
GS_Packet t = mtvu.gsPackQueue[0];
return t; // XGkick GS packet(s)
}
Console.Error("MTVU: Expected gsPackQueue to have elements!");
pxAssert(0);
return GS_Packet(); // gsPack.size will be 0
}
// MTVU: Gets called by MTGS thread
void PopGSPacketMTVU() {
ScopedLock lock(mtvu.gsPackMutex);
if (mtvu.gsPackQueue.size()) {
mtvu.gsPackQueue.pop_front();
}
}
// MTVU: Returns the amount of pending
// GS Packets that MTGS hasn't yet processed
u32 GetPendingGSPackets() {
ScopedLock lock(mtvu.gsPackMutex);
u32 t = mtvu.gsPackQueue.size();
return t;
}
};
struct Gif_Unit {
@ -280,8 +389,8 @@ struct Gif_Unit {
GIF_TRANSFER_TYPE lastTranType; // Last Transfer Type
Gif_Unit() : stat(gifRegs.stat) {
gifPath[0].Init(GIF_PATH_1, _1mb*8, _16kb + _1kb);
gifPath[1].Init(GIF_PATH_2, _1mb*8, _1mb + _1kb);
gifPath[0].Init(GIF_PATH_1, _1mb*9, _1mb + _1kb);
gifPath[1].Init(GIF_PATH_2, _1mb*9, _1mb + _1kb);
gifPath[2].Init(GIF_PATH_3, _1mb*9, _1mb + _1kb);
}
@ -307,24 +416,24 @@ struct Gif_Unit {
// Adds a finished GS Packet to the MTGS ring buffer
__fi void AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) {
Gif_AddCompletedGSPacket(gsPack, path);
if (gsPack.size==~0u) Gif_AddGSPacketMTVU (gsPack, path);
else Gif_AddCompletedGSPacket(gsPack, path);
if (PRINT_GIF_PACKET) Gif_ParsePacket(gsPack, path);
}
// Returns GS Packet Size in bytes
u32 GetGSPacketSize(GIF_PATH pathIdx, u8* pMem, u32 offset = 0) {
u32 memMask = pathIdx ? 0xffffffffu : 0x3fffu;
u32 size = 0;
u32 GetGSPacketSize(GIF_PATH pathIdx, u8* pMem, u32 offset = 0, u32 size = ~0u) {
u32 memMask = pathIdx ? ~0u : 0x3fffu;
u32 curSize = 0;
for(;;) {
Gif_Tag gifTag(&pMem[offset & memMask]);
incTag(offset, size, 16 + gifTag.len); // Tag + Data length
if (pathIdx == GIF_PATH_1 && size >= 0x4000) {
incTag(offset, curSize, 16 + gifTag.len); // Tag + Data length
if (pathIdx == GIF_PATH_1 && curSize >= 0x4000) {
Console.Warning("Gif Unit - GS packet size exceeded VU memory size!");
return 0; // Bios does this... (Fixed if you delay vu1's xgkick by 103 vu cycles)
}
if (gifTag.tag.EOP) {
return size;
}
if (curSize >= size) return size;
if (gifTag.tag.EOP) return curSize;
}
}
@ -332,8 +441,22 @@ struct Gif_Unit {
// The return value is the amount of data (in bytes) that was processed
// If transfer cannot take place at this moment the return value is 0
u32 TransferGSPacketData(GIF_TRANSFER_TYPE tranType, u8* pMem, u32 size, bool aligned=false) {
GIF_LOG("%s - [path=%d][size=%d]", Gif_TransferStr[(tranType>>8)&0xf], (tranType&3)+1, size);
if (THREAD_VU1) {
Gif_Path& path1 = gifPath[GIF_PATH_1];
if (tranType == GIF_TRANS_XGKICK) { // This is on the MTVU thread
path1.CopyGSPacketData(pMem, size, aligned);
path1.ExecuteGSPacketMTVU();
return size;
}
if (tranType == GIF_TRANS_MTVU) { // This is on the EE thread
path1.mtvu.fakePackets++;
if (CanDoGif()) Execute();
return 0;
}
}
GUNIT_LOG("%s - [path=%d][size=%d]", Gif_TransferStr[(tranType>>8)&0xf], (tranType&3)+1, size);
if (size == 0) { GUNIT_WARN("Gif Unit - Size == 0"); return 0; }
if(!CanDoGif()) { GUNIT_WARN("Gif Unit - Signal or PSE Set or Dir = GS to EE"); }
pxAssertDev((stat.APATH==0) || checkPaths(1,1,1), "Gif Unit - APATH wasn't cleared?");
@ -344,6 +467,7 @@ struct Gif_Unit {
}
if (tranType == GIF_TRANS_DMA) {
if(!CanDoPath3()) { if (!Path3Masked()) stat.P3Q = 1; return 0; } // DMA Stall
//if (stat.P2Q) DevCon.WriteLn("P2Q while path 3");
}
if (tranType == GIF_TRANS_XGKICK) {
if(!CanDoPath1()) { stat.P1Q = 1; } // We always buffer path1 packets
@ -404,7 +528,7 @@ struct Gif_Unit {
GS_Packet gsPack = path.ExecuteGSPacket();
if(!gsPack.done) {
if (stat.APATH == 3 && CanDoP3Slice() && !gsSIGNAL.queued) {
if(!didPath3 && checkPaths(1,1,0)) { // Path3 slicing
if(!didPath3 && /*!Path3Masked() &&*/ checkPaths(1,1,0)) { // Path3 slicing
didPath3 = true;
stat.APATH = 0;
stat.IP3 = 1;
@ -433,7 +557,7 @@ struct Gif_Unit {
}
if (!gsSIGNAL.queued && !gifPath[0].isDone()) { stat.APATH = 1; stat.P1Q = 0; }
elif (!gsSIGNAL.queued && !gifPath[1].isDone()) { stat.APATH = 2; stat.P2Q = 0; }
elif (!gsSIGNAL.queued && !gifPath[2].isDone() && !Path3Masked())
elif (!gsSIGNAL.queued && !gifPath[2].isDone() && !Path3Masked() /*&& !stat.P2Q*/)
{ stat.APATH = 3; stat.P3Q = 0; stat.IP3 = 0; }
else { stat.APATH = 0; stat.OPH = 0; break; }
}

View File

@ -19,7 +19,6 @@
#include "Hardware.h"
#include "newVif.h"
#include "IPU/IPUdma.h"
#include "Gif.h"
#include "Gif_Unit.h"
using namespace R5900;

View File

@ -17,7 +17,6 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Hardware.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "ps2/HwInternal.h"

View File

@ -21,6 +21,7 @@
#include "GS.h"
#include "Gif_Unit.h"
#include "MTVU.h"
#include "Elfheader.h"
#include "SamplProf.h"
@ -242,36 +243,29 @@ void SysMtgsThread::OpenPlugin()
GSsetGameCRC( ElfCRC, 0 );
}
class RingBufferLock : public ScopedLock
{
typedef ScopedLock _parent;
protected:
SysMtgsThread& m_mtgs;
struct RingBufferLock {
ScopedLock m_lock1;
ScopedLock m_lock2;
SysMtgsThread& m_mtgs;
public:
RingBufferLock( SysMtgsThread& mtgs )
: ScopedLock( mtgs.m_mtx_RingBufferBusy )
, m_mtgs( mtgs )
{
RingBufferLock(SysMtgsThread& mtgs)
: m_lock1(mtgs.m_mtx_RingBufferBusy),
m_lock2(mtgs.m_mtx_RingBufferBusy2),
m_mtgs(mtgs) {
m_mtgs.m_RingBufferIsBusy = true;
}
virtual ~RingBufferLock() throw()
{
virtual ~RingBufferLock() throw() {
m_mtgs.m_RingBufferIsBusy = false;
}
void Acquire()
{
_parent::Acquire();
void Acquire() {
m_lock1.Acquire();
m_lock2.Acquire();
m_mtgs.m_RingBufferIsBusy = true;
}
void Release()
{
void Release() {
m_mtgs.m_RingBufferIsBusy = false;
_parent::Release();
m_lock2.Release();
m_lock1.Release();
}
};
@ -281,10 +275,9 @@ void SysMtgsThread::ExecuteTaskInThread()
PacketTagType prevCmd;
#endif
RingBufferLock busy( *this );
RingBufferLock busy (*this);
while( true )
{
while(true) {
busy.Release();
// Performance note: Both of these perform cancellation tests, but pthread_testcancel
@ -299,8 +292,7 @@ void SysMtgsThread::ExecuteTaskInThread()
// ever be modified by this thread.
while( m_ReadPos != volatize(m_WritePos))
{
if( EmuConfig.GS.DisableOutput )
{
if (EmuConfig.GS.DisableOutput) {
m_ReadPos = m_WritePos;
continue;
}
@ -327,7 +319,7 @@ void SysMtgsThread::ExecuteTaskInThread()
switch( tag.command )
{
#if COPY_GS_PACKET_TO_MTGS == 1 // d
#if COPY_GS_PACKET_TO_MTGS == 1
case GS_RINGTYPE_P1:
{
uint datapos = (m_ReadPos+1) & RingBufferMask;
@ -412,6 +404,21 @@ void SysMtgsThread::ExecuteTaskInThread()
break;
}
case GS_RINGTYPE_MTVU_GSPACKET: {
MTVU_LOG("MTGS - Waiting on semaXGkick!");
vu1Thread.KickStart(true);
busy.m_lock2.Release();
// Wait for MTVU to complete vu1 program
vu1Thread.semaXGkick.WaitWithoutYield();
busy.m_lock2.Acquire();
Gif_Path& path = gifUnit.gifPath[GIF_PATH_1];
GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s)
if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16);
AtomicExchangeSub(path.readAmount, gsPack.size + gsPack.readAmount);
path.PopGSPacketMTVU(); // Should be done last, for proper Gif_MTGS_Wait()
break;
}
default:
{
switch( tag.command )
@ -572,27 +579,43 @@ void SysMtgsThread::OnCleanupInThread()
}
// Waits for the GS to empty out the entire ring buffer contents.
// Used primarily for plugin startup/shutdown.
void SysMtgsThread::WaitGS()
// If syncRegs, then writes pcsx2's gs regs to MTGS's internal copy
// If weakWait, then this function is allowed to exit after MTGS finished a path1 packet
// If isMTVU, then this implies this function is being called from the MTVU thread...
void SysMtgsThread::WaitGS(bool syncRegs, bool weakWait, bool isMTVU)
{
pxAssertDev( !IsSelf(), "This method is only allowed from threads *not* named MTGS." );
if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return;
if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return;
if( volatize(m_ReadPos) != m_WritePos )
{
Gif_Path& path = gifUnit.gifPath[GIF_PATH_1];
u32 startP1Packs = weakWait ? path.GetPendingGSPackets() : 0;
if (isMTVU || volatize(m_ReadPos) != m_WritePos) {
SetEvent();
RethrowException();
do {
m_mtx_RingBufferBusy.Wait();
for(;;) {
if (weakWait) m_mtx_RingBufferBusy2.Wait();
else m_mtx_RingBufferBusy .Wait();
RethrowException();
} while( volatize(m_ReadPos) != m_WritePos );
if(!isMTVU && volatize(m_ReadPos) == m_WritePos) break;
u32 curP1Packs = weakWait ? path.GetPendingGSPackets() : 0;
if (weakWait && ((startP1Packs-curP1Packs) || !curP1Packs)) break;
// On weakWait we will stop waiting on the MTGS thread if the
// MTGS thread has processed a vu1 xgkick packet, or is pending on
// its final vu1 xgkick packet (!curP1Packs)...
// Note: m_WritePos doesn't seem to have proper atomic write
// code, so reading it from the MTVU thread might be dangerous;
// hence it has been avoided...
}
}
// Completely synchronize GS and MTGS register states.
memcpy_fast( RingBuffer.Regs, PS2MEM_GS, sizeof(RingBuffer.Regs) );
if (syncRegs) {
ScopedLock lock(m_mtx_WaitGS);
// Completely synchronize GS and MTGS register states.
memcpy_fast(RingBuffer.Regs, PS2MEM_GS, sizeof(RingBuffer.Regs));
}
}
// Sets the gsEvent flag and releases a timeslice.

37
pcsx2/MTVU.cpp Normal file
View File

@ -0,0 +1,37 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "Common.h"
#include "MTVU.h"
#include "newVif.h"
__aligned16 VU_Thread vu1Thread(CpuVU1, VU1);
// Calls the vif unpack functions from the MTVU thread
void MTVU_Unpack(void* data, VIFregisters& vifRegs) {
bool isFill = vifRegs.cycle.cl < vifRegs.cycle.wl;
if (newVifDynaRec) dVifUnpack<1>((u8*)data, isFill);
else _nVifUnpack(1, (u8*)data, vifRegs.mode, isFill);
}
// Called on Saving/Loading states...
void SaveStateBase::mtvuFreeze() {
FreezeTag("MTVU");
pxAssert(vu1Thread.IsDone());
if (!IsSaving()) vu1Thread.Reset();
Freeze(vu1Thread.vuCycles);
Freeze(vu1Thread.vuCycleIdx);
}

305
pcsx2/MTVU.h Normal file
View File

@ -0,0 +1,305 @@
#pragma once
#include "System/SysThreads.h"
#include "Vif.h"
#include "Vif_Dma.h"
#include "VUmicro.h"
#include "Gif_Unit.h"
extern void MTVU_Unpack(void* data, VIFregisters& vifRegs);
#define volatize(x) (*reinterpret_cast<volatile uint*>(&(x)))
#define size_u32(x) (((u32)x+3u)>>2) // Rounds up a size in bytes for size in u32's
#define MTVU_ALWAYS_KICK 0
#define MTVU_SYNC_MODE 0
#define MTVU_LOG(...) do{} while(0)
//#define MTVU_LOG DevCon.WriteLn
enum MTVU_EVENT {
MTVU_VU_EXECUTE, // Execute VU program
MTVU_VU_WRITE_MICRO, // Write to VU micro-mem
MTVU_VU_WRITE_DATA, // Write to VU data-mem
MTVU_VIF_WRITE_COL, // Write to Vif col reg
MTVU_VIF_WRITE_ROW, // Write to Vif row reg
MTVU_VIF_UNPACK, // Execute Vif Unpack
MTVU_NULL_PACKET, // Go back to beginning of buffer
MTVU_RESET
};
// Notes:
// - This class should only be accessed from the EE thread...
// - buffer_size must be power of 2
// - ring-buffer has no complete pending packets when read_pos==write_pos
struct VU_Thread : public pxThread {
static const u32 buffer_size = (_1mb * 16) / sizeof(u32);
static const u32 buffer_mask = buffer_size - 1;
__aligned(4) u32 buffer[buffer_size];
__aligned(4) volatile s32 read_pos; // Only modified by VU thread
__aligned(4) volatile bool isBusy; // Is thread processing data?
__aligned(4) s32 write_pos; // Only modified by EE thread
__aligned(4) s32 write_offset; // Only modified by EE thread
__aligned(4) Mutex mtxBusy;
__aligned(4) Semaphore semaEvent;
__aligned(4) Semaphore semaXGkick;
__aligned(4) BaseVUmicroCPU*& vuCPU;
__aligned(4) VURegs& vuRegs;
__aligned16 vifStruct vif;
__aligned16 VIFregisters vifRegs;
__aligned(4) u32 vuCycles[4]; // Used for VU cycle stealing hack
__aligned(4) u32 vuCycleIdx; // Used for VU cycle stealing hack
VU_Thread(BaseVUmicroCPU*& _vuCPU, VURegs& _vuRegs) :
vuCPU(_vuCPU), vuRegs(_vuRegs) {
m_name = L"MTVU";
Reset();
}
virtual ~VU_Thread() throw() {
pxThread::Cancel();
}
void InitThread() {
Start(); // Starts the pxThread
}
void Reset() {
read_pos = 0;
write_pos = 0;
write_offset = 0;
vuCycleIdx = 0;
isBusy = false;
memzero(vif);
memzero(vifRegs);
memzero(vuCycles);
}
protected:
// Should only be called by ReserveSpace()
__ri void WaitOnSize(s32 size) {
for(;;) {
s32 readPos = GetReadPos();
if (readPos <= write_pos) break; // MTVU is reading in back of write_pos
if (readPos > write_pos + size) break; // Enough free front space
if (1) { // Let MTVU run to free up buffer space
KickStart();
if (IsDevBuild) DevCon.WriteLn("WaitOnSize()");
ScopedLock lock(mtxBusy);
}
}
}
// Makes sure theres enough room in the ring buffer
// to write a continuous 'size * sizeof(u32)' bytes
void ReserveSpace(s32 size) {
pxAssert(write_pos < buffer_size);
pxAssert(size < buffer_size);
pxAssert(size > 0);
pxAssert(write_offset == 0);
if (write_pos + size > buffer_size) {
pxAssert(write_pos > 0);
WaitOnSize(1); // Size of MTVU_NULL_PACKET
Write(MTVU_NULL_PACKET);
write_offset = 0;
AtomicExchange(volatize(write_pos), 0);
}
WaitOnSize(size);
}
// Use this when reading read_pos from ee thread
__fi volatile s32 GetReadPos() {
return AtomicRead(read_pos);
}
// Use this when reading write_pos from vu thread
__fi volatile s32 GetWritePos() {
return AtomicRead(volatize(write_pos));
}
// Gets the effective write pointer after adding write_offset
__fi u32* GetWritePtr() {
return &buffer[(write_pos + write_offset) & buffer_mask];
}
__fi void incReadPos(s32 offset) { // Offset in u32 sizes
s32 temp = (read_pos + offset) & buffer_mask;
AtomicExchange(read_pos, temp);
}
__fi void incWritePos() { // Adds write_offset
s32 temp = (write_pos + write_offset) & buffer_mask;
write_offset = 0;
AtomicExchange(volatize(write_pos), temp);
if (MTVU_ALWAYS_KICK) KickStart();
if (MTVU_SYNC_MODE) WaitVU();
}
__fi u32 Read() {
u32 ret = buffer[read_pos];
incReadPos(1);
return ret;
}
__fi void Read(void* dest, u32 size) { // Size in bytes
memcpy_fast(dest, &buffer[read_pos], size);
incReadPos(size_u32(size));
}
__fi void Write(u32 val) {
GetWritePtr()[0] = val;
write_offset += 1;
}
__fi void Write(void* src, u32 size) { // Size in bytes
memcpy_fast(GetWritePtr(), src, size);
write_offset += size_u32(size);
}
void ExecuteTaskInThread() {
PCSX2_PAGEFAULT_PROTECT {
ExecuteRingBuffer();
} PCSX2_PAGEFAULT_EXCEPT;
}
void ExecuteRingBuffer() {
for(;;) {
semaEvent.WaitWithoutYield();
ScopedLockBool lock(mtxBusy, isBusy);
while (read_pos != GetWritePos()) {
u32 tag = Read();
switch (tag) {
case MTVU_VU_EXECUTE: {
vuRegs.cycle = 0;
s32 addr = Read();
vifRegs.top = Read();
vifRegs.itop = Read();
if (addr != -1) vuRegs.VI[REG_TPC].UL = addr;
vuCPU->Execute(vu1RunCycles);
gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU();
semaXGkick.Post(); // Tell MTGS a path1 packet is complete
AtomicExchange(vuCycles[vuCycleIdx], vuRegs.cycle);
vuCycleIdx = (vuCycleIdx + 1) & 3;
break;
}
case MTVU_VU_WRITE_MICRO: {
u32 vu_micro_addr = Read();
u32 size = Read();
vuCPU->Clear(vu_micro_addr, size);
Read(&vuRegs.Micro[vu_micro_addr], size);
break;
}
case MTVU_VU_WRITE_DATA: {
u32 vu_data_addr = Read();
u32 size = Read();
Read(&vuRegs.Mem[vu_data_addr], size);
break;
}
case MTVU_VIF_WRITE_COL:
Read(&vif.MaskCol, sizeof(vif.MaskCol));
break;
case MTVU_VIF_WRITE_ROW:
Read(&vif.MaskRow, sizeof(vif.MaskRow));
break;
case MTVU_VIF_UNPACK: {
u32 vif_copy_size = (uptr)&vif.StructEnd - (uptr)&vif.tag;
Read(&vif.tag, vif_copy_size);
Read(&vifRegs, sizeof(vifRegs));
u32 size = Read();
MTVU_Unpack(&buffer[read_pos], vifRegs);
incReadPos(size_u32(size));
break;
}
case MTVU_NULL_PACKET:
AtomicExchange(read_pos, 0);
break;
jNO_DEFAULT;
}
}
}
}
// Returns Average number of vu Cycles from last 4 runs
u32 Get_vuCycles() { // Used for vu cycle stealing hack
return (AtomicRead(vuCycles[0]) + AtomicRead(vuCycles[1])
+ AtomicRead(vuCycles[2]) + AtomicRead(vuCycles[3])) >> 2;
}
public:
// Get MTVU to start processing its packets if it isn't already
void KickStart(bool forceKick = false) {
if ((forceKick && !semaEvent.Count())
|| (!isBusy && GetReadPos() != write_pos)) semaEvent.Post();
}
// Used for assertions...
bool IsDone() { return !isBusy && GetReadPos() == GetWritePos(); }
// Waits till MTVU is done processing
void WaitVU() {
MTVU_LOG("MTVU - WaitVU!");
for(;;) {
if (IsDone()) break;
//DevCon.WriteLn("WaitVU()");
pxAssert(THREAD_VU1);
KickStart();
ScopedLock lock(mtxBusy);
}
}
void ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop) {
MTVU_LOG("MTVU - ExecuteVU!");
ReserveSpace(4);
Write(MTVU_VU_EXECUTE);
Write(vu_addr);
Write(vif_top);
Write(vif_itop);
incWritePos();
gifUnit.TransferGSPacketData(GIF_TRANS_MTVU, NULL, 0);
KickStart();
u32 cycles = std::min(Get_vuCycles(), 3000u);
cpuRegs.cycle += cycles * EmuConfig.Speedhacks.VUCycleSteal;
}
void VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32 size) {
MTVU_LOG("MTVU - VifUnpack!");
u32 vif_copy_size = (uptr)&_vif.StructEnd - (uptr)&_vif.tag;
ReserveSpace(1 + size_u32(vif_copy_size) + size_u32(sizeof(_vifRegs)) + 1 + size_u32(size));
Write(MTVU_VIF_UNPACK);
Write(&_vif.tag, vif_copy_size);
Write(&_vifRegs, sizeof(_vifRegs));
Write(size);
Write(data, size);
incWritePos();
KickStart();
}
// Writes to VU's Micro Memory (size in bytes)
void WriteMicroMem(u32 vu_micro_addr, void* data, u32 size) {
MTVU_LOG("MTVU - WriteMicroMem!");
ReserveSpace(3 + size_u32(size));
Write(MTVU_VU_WRITE_MICRO);
Write(vu_micro_addr);
Write(size);
Write(data, size);
incWritePos();
}
// Writes to VU's Data Memory (size in bytes)
void WriteDataMem(u32 vu_data_addr, void* data, u32 size) {
MTVU_LOG("MTVU - WriteDataMem!");
ReserveSpace(3 + size_u32(size));
Write(MTVU_VU_WRITE_DATA);
Write(vu_data_addr);
Write(size);
Write(data, size);
incWritePos();
}
void WriteCol(vifStruct& _vif) {
MTVU_LOG("MTVU - WriteCol!");
ReserveSpace(1 + size_u32(sizeof(_vif.MaskCol)));
Write(MTVU_VIF_WRITE_COL);
Write(&_vif.MaskCol, sizeof(_vif.MaskCol));
incWritePos();
}
void WriteRow(vifStruct& _vif) {
MTVU_LOG("MTVU - WriteRow!");
ReserveSpace(1 + size_u32(sizeof(_vif.MaskRow)));
Write(MTVU_VIF_WRITE_ROW);
Write(&_vif.MaskRow, sizeof(_vif.MaskRow));
incWritePos();
}
};
extern __aligned16 VU_Thread vu1Thread;

View File

@ -38,8 +38,9 @@ BIOS
#include <wx/file.h>
#include "IopCommon.h"
#include "VUmicro.h"
#include "GS.h"
#include "VUmicro.h"
#include "MTVU.h"
#include "ps2/HwInternal.h"
#include "ps2/BiosTools.h"
@ -102,6 +103,7 @@ static vtlbHandler
vu0_micro_mem,
vu1_micro_mem,
vu1_data_mem,
hw_by_page[0x10] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
@ -131,7 +133,11 @@ void memMapVUmicro()
// VU0/VU1 memory (data)
// VU0 is 4k, mirrored 4 times across a 16k area.
vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000);
vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000);
// Note: In order for the below conditional to work correctly
// support needs to be coded to reset the memMappings when MTVU is
// turned off/on. For now we just always use the vu data handlers...
if (1||THREAD_VU1) vtlb_MapHandler(vu1_data_mem,0x1100c000,0x00004000);
else vtlb_MapBlock (VU1.Mem, 0x1100c000,0x00004000);
}
void memMapPhy()
@ -431,127 +437,185 @@ static void __fastcall _ext_memWrite128(u32 mem, const mem128_t *value)
typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc );
template<int vunum>
static __fi void ClearVuFunc( u32 addr, u32 size )
{
if( vunum==0 )
CpuVU0->Clear(addr,size);
else
CpuVU1->Clear(addr,size);
template<int vunum> static __fi void ClearVuFunc(u32 addr, u32 size) {
if (vunum) CpuVU1->Clear(addr, size);
else CpuVU0->Clear(addr, size);
}
template<int vunum>
static mem8_t __fastcall vuMicroRead8(u32 addr)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
// VU Micro Memory Reads...
template<int vunum> static mem8_t __fc vuMicroRead8(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return vu->Micro[addr];
}
template<int vunum>
static mem16_t __fastcall vuMicroRead16(u32 addr)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
template<int vunum> static mem16_t __fc vuMicroRead16(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u16*)&vu->Micro[addr];
}
template<int vunum>
static mem32_t __fastcall vuMicroRead32(u32 addr)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
template<int vunum> static mem32_t __fc vuMicroRead32(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u32*)&vu->Micro[addr];
}
template<int vunum>
static void __fastcall vuMicroRead64(u32 addr,mem64_t* data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
template<int vunum> static void __fc vuMicroRead64(u32 addr,mem64_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
*data=*(u64*)&vu->Micro[addr];
}
template<int vunum>
static void __fastcall vuMicroRead128(u32 addr,mem128_t* data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
template<int vunum> static void __fc vuMicroRead128(u32 addr,mem128_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
CopyQWC(data,&vu->Micro[addr]);
}
// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per
// frame in-game, and usually none at all after BIOS), so cpu clears aren't much of a big deal.
template<int vunum>
static void __fastcall vuMicroWrite8(u32 addr,mem8_t data)
{
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (vu.Micro[addr]!=data)
{
ClearVuFunc<vunum>(addr&(~7), 8); // Clear before writing new data (clearing 8 bytes because an instruction is 8 bytes) (cottonvibes)
vu.Micro[addr]=data;
template<int vunum> static void __fc vuMicroWrite8(u32 addr,mem8_t data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, &data, sizeof(u8));
return;
}
if (vu->Micro[addr]!=data) { // Clear before writing new data
ClearVuFunc<vunum>(addr, 8); //(clearing 8 bytes because an instruction is 8 bytes) (cottonvibes)
vu->Micro[addr] =data;
}
}
template<int vunum> static void __fc vuMicroWrite16(u32 addr, mem16_t data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, &data, sizeof(u16));
return;
}
if (*(u16*)&vu->Micro[addr]!=data) {
ClearVuFunc<vunum>(addr, 8);
*(u16*)&vu->Micro[addr] =data;
}
}
template<int vunum> static void __fc vuMicroWrite32(u32 addr, mem32_t data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, &data, sizeof(u32));
return;
}
if (*(u32*)&vu->Micro[addr]!=data) {
ClearVuFunc<vunum>(addr, 8);
*(u32*)&vu->Micro[addr] =data;
}
}
template<int vunum> static void __fc vuMicroWrite64(u32 addr, const mem64_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, (void*)data, sizeof(u64));
return;
}
if (*(u64*)&vu->Micro[addr]!=data[0]) {
ClearVuFunc<vunum>(addr, 8);
*(u64*)&vu->Micro[addr] =data[0];
}
}
template<int vunum> static void __fc vuMicroWrite128(u32 addr, const mem128_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, (void*)data, sizeof(u128));
return;
}
if ((u128&)vu->Micro[addr]!=*data) {
ClearVuFunc<vunum>(addr, 16);
CopyQWC(&vu->Micro[addr],data);
}
}
template<int vunum>
static void __fastcall vuMicroWrite16(u32 addr,mem16_t data)
{
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u16*)&vu.Micro[addr]!=data)
{
ClearVuFunc<vunum>(addr&(~7), 8);
*(u16*)&vu.Micro[addr]=data;
}
// VU Data Memory Reads...
template<int vunum> static mem8_t __fc vuDataRead8(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return vu->Mem[addr];
}
template<int vunum> static mem16_t __fc vuDataRead16(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u16*)&vu->Mem[addr];
}
template<int vunum> static mem32_t __fc vuDataRead32(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u32*)&vu->Mem[addr];
}
template<int vunum> static void __fc vuDataRead64(u32 addr, mem64_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
*data=*(u64*)&vu->Mem[addr];
}
template<int vunum> static void __fc vuDataRead128(u32 addr, mem128_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
CopyQWC(data,&vu->Mem[addr]);
}
template<int vunum>
static void __fastcall vuMicroWrite32(u32 addr,mem32_t data)
{
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u32*)&vu.Micro[addr]!=data)
{
ClearVuFunc<vunum>(addr&(~7), 8);
*(u32*)&vu.Micro[addr]=data;
// VU Data Memory Writes...
template<int vunum> static void __fc vuDataWrite8(u32 addr, mem8_t data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteDataMem(addr, &data, sizeof(u8));
return;
}
vu->Mem[addr] = data;
}
template<int vunum> static void __fc vuDataWrite16(u32 addr, mem16_t data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteDataMem(addr, &data, sizeof(u16));
return;
}
*(u16*)&vu->Mem[addr] = data;
}
template<int vunum> static void __fc vuDataWrite32(u32 addr, mem32_t data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteDataMem(addr, &data, sizeof(u32));
return;
}
*(u32*)&vu->Mem[addr] = data;
}
template<int vunum> static void __fc vuDataWrite64(u32 addr, const mem64_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteDataMem(addr, (void*)data, sizeof(u64));
return;
}
*(u64*)&vu->Mem[addr] = data[0];
}
template<int vunum> static void __fc vuDataWrite128(u32 addr, const mem128_t* data) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) {
vu1Thread.WriteDataMem(addr, (void*)data, sizeof(u128));
return;
}
CopyQWC(&vu->Mem[addr], data);
}
template<int vunum>
static void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data)
{
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u64*)&vu.Micro[addr]!=data[0])
{
ClearVuFunc<vunum>(addr&(~7), 8);
*(u64*)&vu.Micro[addr]=data[0];
}
}
template<int vunum>
static void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data)
{
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if ((u128&)vu.Micro[addr] != *data)
{
ClearVuFunc<vunum>(addr&(~7), 16);
CopyQWC(&vu.Micro[addr],data);
}
}
void memSetPageAddr(u32 vaddr, u32 paddr)
{
@ -640,9 +704,8 @@ void eeMemoryReserve::Commit()
// Resets memory mappings, unmaps TLBs, reloads bios roms, etc.
void eeMemoryReserve::Reset()
{
if (!mmap_faultHandler)
{
pxAssume(Source_PageFault);
if(!mmap_faultHandler) {
pxAssert(Source_PageFault);
mmap_faultHandler = new mmap_PageFaultHandler();
}
@ -674,7 +737,8 @@ void eeMemoryReserve::Reset()
// Dynarec versions of VUs
vu0_micro_mem = vtlb_RegisterHandlerTempl1(vuMicro,0);
vu1_micro_mem = vtlb_RegisterHandlerTempl1(vuMicro,1);
vu1_data_mem = (1||THREAD_VU1) ? vtlb_RegisterHandlerTempl1(vuData,1) : NULL;
//////////////////////////////////////////////////////////////////////////////////////////
// IOP's "secret" Hardware Register mapping, accessible from the EE (and meant for use
// by debugging or BIOS only). The IOP's hw regs are divided into three main pages in

View File

@ -64,6 +64,7 @@ void Pcsx2Config::SpeedhackOptions::LoadSave( IniInterface& ini )
IniBitBool( WaitLoop );
IniBitBool( vuFlagHack );
IniBitBool( vuBlockHack );
IniBitBool( vuThread );
}
void Pcsx2Config::ProfilerOptions::LoadSave( IniInterface& ini )

View File

@ -21,6 +21,7 @@
#include "R3000A.h"
#include "VUmicro.h"
#include "COP0.h"
#include "MTVU.h"
#include "System/SysThreads.h"
#include "R5900Exceptions.h"
@ -54,6 +55,7 @@ extern SysMainMemory& GetVmMemory();
void cpuReset()
{
vu1Thread.WaitVU();
if (GetMTGS().IsOpen())
GetMTGS().WaitGS(); // GS better be done processing before we reset the EE, just in case.
@ -281,9 +283,6 @@ static __fi void _cpuTestInterrupts()
TESTINT(DMAC_GIF, gifInterrupt);
TESTINT(DMAC_SIF0, EEsif0Interrupt);
TESTINT(DMAC_SIF1, EEsif1Interrupt);
//extern void Gif_Execute();
//TESTINT(DMAC_GIF_UNIT, Gif_Execute);
// Profile-guided Optimization (sorta)
// The following ints are rarely called. Encasing them in a conditional

View File

@ -18,6 +18,7 @@
#include "SPR.h"
#include "VUmicro.h"
#include "MTVU.h"
extern void mfifoGIFtransfer(int);
@ -31,19 +32,23 @@ void sprInit()
{
}
static void TestClearVUs(u32 madr, u32 size)
static void TestClearVUs(u32 madr, u32 qwc)
{
if (madr >= 0x11000000)
{
if (madr < 0x11004000)
{
DbgCon.Warning("scratch pad clearing vu0");
CpuVU0->Clear(madr&0xfff, size);
CpuVU0->Clear(madr&0xfff, qwc * 16);
}
else if (madr >= 0x11008000 && madr < 0x1100c000)
{
DbgCon.Warning("scratch pad clearing vu1");
CpuVU1->Clear(madr&0x3fff, size);
if (THREAD_VU1) {
DevCon.Error("MTVU Warning: SPR Accessing VU1 Memory!!!");
vu1Thread.WaitVU();
}
CpuVU1->Clear(madr&0x3fff, qwc * 16);
}
}
}
@ -83,7 +88,7 @@ int _SPR0chain()
memcpy_qwc(pMem, &psSu128(spr0ch.sadr), partialqwc);
// clear VU mem also!
TestClearVUs(spr0ch.madr, partialqwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes)
TestClearVUs(spr0ch.madr, partialqwc);
spr0ch.madr += partialqwc << 4;
spr0ch.sadr += partialqwc << 4;
@ -135,7 +140,7 @@ void _SPR0interleave()
case NO_MFD:
case MFD_RESERVED:
// clear VU mem also!
TestClearVUs(spr0ch.madr, spr0ch.qwc << 2);
TestClearVUs(spr0ch.madr, spr0ch.qwc);
memcpy_qwc(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc);
break;
}

View File

@ -21,6 +21,7 @@
#include "ps2/BiosTools.h"
#include "COP0.h"
#include "VUmicro.h"
#include "MTVU.h"
#include "Cache.h"
#include "AppConfig.h"
@ -150,10 +151,9 @@ static const uint MainMemorySizeInBytes =
SaveStateBase& SaveStateBase::FreezeMainMemory()
{
if (IsLoading())
PreLoadPrep();
else
m_memory->MakeRoomFor( m_idx + MainMemorySizeInBytes );
vu1Thread.WaitVU(); // Finish VU1 just in-case...
if (IsLoading()) PreLoadPrep();
else m_memory->MakeRoomFor( m_idx + MainMemorySizeInBytes );
// First Block - Memory Dumps
// ---------------------------
@ -175,8 +175,8 @@ SaveStateBase& SaveStateBase::FreezeMainMemory()
SaveStateBase& SaveStateBase::FreezeInternals()
{
if( IsLoading() )
PreLoadPrep();
vu1Thread.WaitVU(); // Finish VU1 just in-case...
if (IsLoading()) PreLoadPrep();
// Second Block - Various CPU Registers and States
// -----------------------------------------------

View File

@ -24,7 +24,7 @@
// the lower 16 bit value. IF the change is breaking of all compatibility with old
// states, increment the upper 16 bit value, and clear the lower 16 bits to 0.
static const u32 g_SaveVersion = (0x9A02 << 16) | 0x0000;
static const u32 g_SaveVersion = (0x9A03 << 16) | 0x0000;
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
// between the GS saving function and the MTGS's needs. :)
@ -193,6 +193,7 @@ protected:
// Load/Save functions for the various components of our glorious emulator!
void mtvuFreeze();
void rcntFreeze();
void vuMicroFreeze();
void vif0Freeze();

View File

@ -154,7 +154,7 @@ protected:
// implemented by the provisioning interface.
extern SysCpuProviderPack& GetCpuProviders();
extern void SysLogMachineCaps(); // Detects cpu type and fills cpuInfo structs.
extern void SysLogMachineCaps(); // Detects cpu type and fills cpuInfo structs.
extern void SysClearExecutionCache(); // clears recompiled execution caches!
extern void SysOutOfMemory_EmergencyResponse(uptr blocksize);

View File

@ -19,10 +19,9 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include <cmath>
#include "VUmicro.h"
#include "MTVU.h"
#ifdef PCSX2_DEBUG
u32 vudump = 0;
@ -39,6 +38,10 @@ void vu1ResetRegs()
}
void vu1Finish() {
if (THREAD_VU1) {
if (VU0.VI[REG_VPU_STAT].UL & 0x100) DevCon.Error("MTVU: VU0.VI[REG_VPU_STAT].UL & 0x100");
return;
}
while (VU0.VI[REG_VPU_STAT].UL & 0x100) {
VUM_LOG("vu1ExecMicro > Stalling until current microprogram finishes");
CpuVU1->Execute(vu1RunCycles);
@ -47,10 +50,15 @@ void vu1Finish() {
void __fastcall vu1ExecMicro(u32 addr)
{
if (THREAD_VU1) {
vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop);
vif1Regs.stat.VEW = false;
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;
return;
}
static int count = 0;
vu1Finish();
VUM_LOG("vu1ExecMicro %x", addr);
VUM_LOG("vu1ExecMicro %x (count=%d)", addr, count++);
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;

View File

@ -18,6 +18,7 @@
#include "Common.h"
#include "VUmicro.h"
#include "MTVU.h"
extern void _vuFlushAll(VURegs* VU);
@ -173,6 +174,14 @@ InterpVU1::InterpVU1()
IsInterpreter = true;
}
void InterpVU1::Reset() {
vu1Thread.WaitVU();
}
void InterpVU1::Shutdown() {
vu1Thread.WaitVU();
}
void InterpVU1::Step()
{
VU1.VI[REG_TPC].UL &= VU1_PROGMASK;

View File

@ -193,8 +193,8 @@ public:
wxString GetLongName() const { return L"VU1 Interpreter"; }
void Reserve() { }
void Shutdown() throw() { }
void Reset() { }
void Shutdown() throw();
void Reset();
void Step();
void Execute(u32 cycles);

View File

@ -54,7 +54,8 @@ void vuMemoryReserve::Reset()
pxAssert( VU0.Mem );
pxAssert( VU1.Mem );
memMapVUmicro();
// Below memMap is already called by "void eeMemoryReserve::Reset()"
//memMapVUmicro();
// === VU0 Initialization ===
memzero(VU0.ACC);

View File

@ -18,6 +18,7 @@
#include "VUops.h"
#include "GS.h"
#include "Gif_Unit.h"
#include "MTVU.h"
#include <cmath>
@ -2018,7 +2019,8 @@ static __ri void _vuEEXP(VURegs * VU) {
static __ri void _vuXITOP(VURegs * VU) {
if (_It_ == 0) return;
VU->VI[_It_].US[0] = VU->GetVifRegs().itop;
if (VU==&VU1 && THREAD_VU1) VU->VI[_It_].US[0] = vu1Thread.vifRegs.itop;
else VU->VI[_It_].US[0] = VU->GetVifRegs().itop;
}
static __ri void _vuXGKICK(VURegs * VU)
@ -2041,7 +2043,8 @@ static __ri void _vuXGKICK(VURegs * VU)
static __ri void _vuXTOP(VURegs * VU) {
if(_It_ == 0) return;
VU->VI[_It_].US[0] = (u16)VU->GetVifRegs().top;
if (VU==&VU1 && THREAD_VU1) VU->VI[_It_].US[0] = (u16)vu1Thread.vifRegs.top;
else VU->VI[_It_].US[0] = (u16)VU->GetVifRegs().top;
}
#define GET_VF0_FLAG(reg) (((reg)==0)?(1<<REG_VF0_FLAG):0)

View File

@ -20,6 +20,7 @@
#include "newVif.h"
#include "GS.h"
#include "Gif.h"
#include "MTVU.h"
__aligned16 vifStruct vif0, vif1;
@ -289,18 +290,18 @@ __fi void vif1STAT(u32 value) {
#define caseVif(x) (idx ? VIF1_##x : VIF0_##x)
_vifT __fi u32 vifRead32(u32 mem) {
vifStruct& vif = GetVifX;
vifStruct& vif = MTVU_VifX;
bool wait = idx && THREAD_VU1;
switch (mem) {
case caseVif(ROW0): return vif.MaskRow._u32[0];
case caseVif(ROW1): return vif.MaskRow._u32[1];
case caseVif(ROW2): return vif.MaskRow._u32[2];
case caseVif(ROW3): return vif.MaskRow._u32[3];
case caseVif(ROW0): if (wait) vu1Thread.WaitVU(); return vif.MaskRow._u32[0];
case caseVif(ROW1): if (wait) vu1Thread.WaitVU(); return vif.MaskRow._u32[1];
case caseVif(ROW2): if (wait) vu1Thread.WaitVU(); return vif.MaskRow._u32[2];
case caseVif(ROW3): if (wait) vu1Thread.WaitVU(); return vif.MaskRow._u32[3];
case caseVif(COL0): return vif.MaskCol._u32[0];
case caseVif(COL1): return vif.MaskCol._u32[1];
case caseVif(COL2): return vif.MaskCol._u32[2];
case caseVif(COL3): return vif.MaskCol._u32[3];
case caseVif(COL0): if (wait) vu1Thread.WaitVU(); return vif.MaskCol._u32[0];
case caseVif(COL1): if (wait) vu1Thread.WaitVU(); return vif.MaskCol._u32[1];
case caseVif(COL2): if (wait) vu1Thread.WaitVU(); return vif.MaskCol._u32[2];
case caseVif(COL3): if (wait) vu1Thread.WaitVU(); return vif.MaskCol._u32[3];
}
return psHu32(mem);
@ -334,15 +335,15 @@ _vifT __fi bool vifWrite32(u32 mem, u32 value) {
// standard register writes -- handled by caller.
break;
case caseVif(ROW0): vif.MaskRow._u32[0] = value; return false;
case caseVif(ROW1): vif.MaskRow._u32[1] = value; return false;
case caseVif(ROW2): vif.MaskRow._u32[2] = value; return false;
case caseVif(ROW3): vif.MaskRow._u32[3] = value; return false;
case caseVif(ROW0): vif.MaskRow._u32[0] = value; if (idx && THREAD_VU1) vu1Thread.WriteRow(vif); return false;
case caseVif(ROW1): vif.MaskRow._u32[1] = value; if (idx && THREAD_VU1) vu1Thread.WriteRow(vif); return false;
case caseVif(ROW2): vif.MaskRow._u32[2] = value; if (idx && THREAD_VU1) vu1Thread.WriteRow(vif); return false;
case caseVif(ROW3): vif.MaskRow._u32[3] = value; if (idx && THREAD_VU1) vu1Thread.WriteRow(vif); return false;
case caseVif(COL0): vif.MaskCol._u32[0] = value; return false;
case caseVif(COL1): vif.MaskCol._u32[1] = value; return false;
case caseVif(COL2): vif.MaskCol._u32[2] = value; return false;
case caseVif(COL3): vif.MaskCol._u32[3] = value; return false;
case caseVif(COL0): vif.MaskCol._u32[0] = value; if (idx && THREAD_VU1) vu1Thread.WriteCol(vif); return false;
case caseVif(COL1): vif.MaskCol._u32[1] = value; if (idx && THREAD_VU1) vu1Thread.WriteCol(vif); return false;
case caseVif(COL2): vif.MaskCol._u32[2] = value; if (idx && THREAD_VU1) vu1Thread.WriteCol(vif); return false;
case caseVif(COL3): vif.MaskCol._u32[3] = value; if (idx && THREAD_VU1) vu1Thread.WriteCol(vif); return false;
}
// fall-through case: issue standard writeback behavior.

View File

@ -106,6 +106,7 @@ union tVIF_STAT {
};
u32 _u32;
tVIF_STAT() {}
tVIF_STAT(u32 val) { _u32 = val; }
bool test(u32 flags) const { return !!(_u32 & flags); }
void set_flags (u32 flags) { _u32 |= flags; }
@ -145,6 +146,7 @@ union tVIF_ERR {
};
u32 _u32;
tVIF_ERR() {}
tVIF_ERR (u32 val) { _u32 = val; }
void write(u32 val) { _u32 = val; }
bool test (u32 flags) const { return !!(_u32 & flags); }
@ -221,6 +223,9 @@ static VIFregisters& vif1Regs = (VIFregisters&)eeHw[0x3C00];
#define vifXch (idx ? (vif1ch) : (vif0ch))
#define vifXRegs (idx ? (vif1Regs) : (vif0Regs))
#define MTVU_VifX (idx ? ((THREAD_VU1) ? vu1Thread.vif : vif1) : (vif0))
#define MTVU_VifXRegs (idx ? ((THREAD_VU1) ? vu1Thread.vifRegs : vif1Regs) : (vif0Regs))
extern void dmaVIF0();
extern void dmaVIF1();
extern void mfifoVIF1transfer(int qwc);

View File

@ -17,7 +17,6 @@
#include "Common.h"
#include "Vif_Dma.h"
#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "VUmicro.h"
#include "newVif.h"

View File

@ -16,7 +16,6 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Vif.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "Vif_Dma.h"

View File

@ -16,11 +16,11 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "Vif_Dma.h"
#include "newVif.h"
#include "VUmicro.h"
#include "MTVU.h"
#define vifOp(vifCodeName) _vifT int __fastcall vifCodeName(int pass, const u32 *data)
#define pass1 if (pass == 0)
@ -36,7 +36,7 @@ vifOp(vifCode_Null);
static __fi void vifFlush(int idx) {
if (!idx) vif0FLUSH();
else vif1FLUSH();
else vif1FLUSH();
}
static __fi void vuExecMicro(int idx, u32 addr) {
@ -70,14 +70,16 @@ static __fi void vuExecMicro(int idx, u32 addr) {
}
}
if(!idx)startcycles = VU0.cycle;
else startcycles = VU1.cycle;
if (!idx) startcycles = VU0.cycle;
else startcycles = VU1.cycle;
if (!idx) vu0ExecMicro(addr);
else vu1ExecMicro(addr);
if(!idx) { g_vu0Cycles += (VU0.cycle-startcycles); g_packetsizeonvu = vif0.vifpacketsize; }
else { g_vu1Cycles += (VU1.cycle-startcycles); g_packetsizeonvu = vif1.vifpacketsize; }
if (!idx || !THREAD_VU1) {
if (!idx) { g_vu0Cycles += (VU0.cycle-startcycles); g_packetsizeonvu = vif0.vifpacketsize; }
else { g_vu1Cycles += (VU1.cycle-startcycles); g_packetsizeonvu = vif1.vifpacketsize; }
}
//DevCon.Warning("Ran VU%x, VU0 Cycles %x, VU1 Cycles %x, start %x cycle %x", idx, g_vu0Cycles, g_vu1Cycles, startcycles, VU1.cycle);
GetVifX.vifstalled = true;
}
@ -225,11 +227,14 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
VURegs& VUx = idx ? VU1 : VU0;
pxAssert(VUx.Micro > 0);
if (idx && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, (u8*)data, size*4);
return;
}
if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
// Clear VU memory before writing!
// (VUs expect size to be 32-bit scale, same as VIF's internal working sizes)
if (!idx) CpuVU0->Clear(addr, size);
else CpuVU1->Clear(addr, size);
if (!idx) CpuVU0->Clear(addr, size*4);
else CpuVU1->Clear(addr, size*4);
memcpy_fast(VUx.Micro + addr, data, size*4);
}
}
@ -387,7 +392,9 @@ vifOp(vifCode_STCol) {
return 1;
}
pass2 {
return _vifCode_STColRow<idx>(data, &vifX.MaskCol._u32[vifX.tag.addr]);
u32 ret = _vifCode_STColRow<idx>(data, &vifX.MaskCol._u32[vifX.tag.addr]);
if (idx && THREAD_VU1) { vu1Thread.WriteCol(vifX); }
return ret;
}
pass3 { VifCodeLog("STCol"); }
return 0;
@ -401,7 +408,9 @@ vifOp(vifCode_STRow) {
return 1;
}
pass2 {
return _vifCode_STColRow<idx>(data, &vifX.MaskRow._u32[vifX.tag.addr]);
u32 ret = _vifCode_STColRow<idx>(data, &vifX.MaskRow._u32[vifX.tag.addr]);
if (idx && THREAD_VU1) { vu1Thread.WriteRow(vifX); }
return ret;
}
pass3 { VifCodeLog("STRow"); }
return 0;
@ -447,7 +456,9 @@ vifOp(vifCode_Unpack) {
vifUnpackSetup<idx>(data);
return 1;
}
pass2 { return nVifUnpack<idx>((u8*)data); }
pass2 {
return nVifUnpack<idx>((u8*)data);
}
pass3 {
vifStruct& vifX = GetVifX;
VIFregisters& vifRegs = vifXRegs;

View File

@ -56,14 +56,18 @@ union tTRXREG {
// NOTE, if debugging vif stalls, use sega classics, spyro, gt4, and taito
struct vifStruct {
u128 MaskRow, MaskCol;
__aligned16 u128 MaskRow;
__aligned16 u128 MaskCol;
struct { // These must be together for MTVU
vifCode tag;
int cmd;
int cl;
u8 usn;
u8 StructEnd; // Address of this is used to calculate end of struct
};
vifCode tag;
int cmd;
int irq;
int cl;
int qwcalign;
u8 usn;
bool done;
bool vifstalled;
@ -72,17 +76,13 @@ struct vifStruct {
// GS registers used for calculating the size of the last local->host transfer initiated on the GS
// Transfer size calculation should be restricted to GS emulation in the future
tBITBLTBUF BITBLTBUF;
tTRXREG TRXREG;
u32 GSLastDownloadSize;
tTRXREG TRXREG;
u32 GSLastDownloadSize;
u8 irqoffset; // 32bit offset where next vif code is
u32 savedtag; // need this for backwards compat with save states
u8 irqoffset; // 32bit offset where next vif code is
u32 vifpacketsize;
u8 inprogress;
u32 lastcmd;
u8 dmamode;
u8 Unused_GifWaitState; // Only here for saved state compatibility
//u8 GifWaitState; // 0 = General PATH checking, 1 = Flush path 3, 2 == Wait for VU1
u8 inprogress;
u8 dmamode;
};
extern __aligned16 vifStruct vif0, vif1;

View File

@ -94,7 +94,6 @@ _vifT void vifTransferLoop(u32* &data) {
vifCmdHandler[idx][vifX.cmd & 0x7f](0, data);
data++; pSize--;
vifX.lastcmd = (vifXRegs.code >> 24) & 0x7f;
if (analyzeIbit<idx>(data, iBit)) break;
continue;
}

View File

@ -17,6 +17,7 @@
#include "Common.h"
#include "Vif.h"
#include "Vif_Dma.h"
#include "MTVU.h"
enum UnpackOffset {
OFFSET_X = 0,
@ -36,10 +37,10 @@ template< uint idx, uint mode, bool doMask >
static __ri void writeXYZW(u32 offnum, u32 &dest, u32 data) {
int n = 0;
vifStruct& vif = GetVifX;
vifStruct& vif = MTVU_VifX;
if (doMask) {
const VIFregisters& regs = vifXRegs;
const VIFregisters& regs = MTVU_VifXRegs;
switch (vif.cl) {
case 0: n = (regs.mask >> (offnum * 2)) & 0x3; break;
case 1: n = (regs.mask >> ( 8 + (offnum * 2))) & 0x3; break;

View File

@ -23,21 +23,24 @@
#endif
#include "GS.h"
#include "MTVU.h"
void AllThreeThreads::LoadWithCurrentTimes()
void AllPCSX2Threads::LoadWithCurrentTimes()
{
ee = GetCoreThread().GetCpuTime();
gs = GetMTGS().GetCpuTime();
vu = vu1Thread.GetCpuTime();
ui = GetThreadCpuTime();
update = GetCPUTicks();
}
AllThreeThreads AllThreeThreads::operator-( const AllThreeThreads& right ) const
AllPCSX2Threads AllPCSX2Threads::operator-( const AllPCSX2Threads& right ) const
{
AllThreeThreads retval;
AllPCSX2Threads retval;
retval.ee = ee - right.ee;
retval.gs = gs - right.gs;
retval.vu = vu - right.vu;
retval.ui = ui - right.ui;
retval.update = update - right.update;
@ -48,6 +51,7 @@ DefaultCpuUsageProvider::DefaultCpuUsageProvider()
{
m_pct_ee = 0;
m_pct_gs = 0;
m_pct_vu = 0;
m_pct_ui = 0;
m_writepos = 0;
@ -69,16 +73,17 @@ void DefaultCpuUsageProvider::UpdateStats()
{
// Measure deltas between the first and last positions in the ring buffer:
AllThreeThreads& newone( m_queue[m_writepos] );
AllPCSX2Threads& newone( m_queue[m_writepos] );
newone.LoadWithCurrentTimes();
m_writepos = (m_writepos+1) % QueueDepth;
const AllThreeThreads deltas( newone - m_queue[m_writepos] );
const AllPCSX2Threads deltas( newone - m_queue[m_writepos] );
// get the real time passed, scaled to the Thread's tick frequency.
u64 timepass = (deltas.update * GetThreadTicksPerSecond()) / GetTickFrequency();
m_pct_ee = (deltas.ee * 100) / timepass;
m_pct_gs = (deltas.gs * 100) / timepass;
m_pct_vu = (deltas.vu * 100) / timepass;
m_pct_ui = (deltas.ui * 100) / timepass;
}
@ -92,6 +97,11 @@ int DefaultCpuUsageProvider::GetGsPct() const
return m_pct_gs;
}
int DefaultCpuUsageProvider::GetVUPct() const
{
return m_pct_vu;
}
int DefaultCpuUsageProvider::GetGuiPct() const
{
return m_pct_ui;

View File

@ -27,6 +27,7 @@ public:
virtual void UpdateStats()=0;
virtual int GetEEcorePct() const=0;
virtual int GetGsPct() const=0;
virtual int GetVUPct() const=0;
virtual int GetGuiPct() const=0;
};
@ -44,16 +45,17 @@ public:
virtual void UpdateStats() { m_Implementation->UpdateStats(); }
virtual int GetEEcorePct() const { return m_Implementation->GetEEcorePct(); }
virtual int GetGsPct() const { return m_Implementation->GetGsPct(); }
virtual int GetVUPct() const { return m_Implementation->GetVUPct(); }
virtual int GetGuiPct() const { return m_Implementation->GetGuiPct(); }
};
struct AllThreeThreads
struct AllPCSX2Threads
{
u64 ee, gs, ui;
u64 ee, gs, vu, ui;
u64 update;
void LoadWithCurrentTimes();
AllThreeThreads operator-( const AllThreeThreads& right ) const;
AllPCSX2Threads operator-( const AllPCSX2Threads& right ) const;
};
class DefaultCpuUsageProvider :
@ -64,11 +66,12 @@ public:
static const uint QueueDepth = 4;
protected:
AllThreeThreads m_queue[QueueDepth];
AllPCSX2Threads m_queue[QueueDepth];
uint m_writepos;
u32 m_pct_ee;
u32 m_pct_gs;
u32 m_pct_vu;
u32 m_pct_ui;
public:
@ -80,6 +83,7 @@ public:
void UpdateStats();
int GetEEcorePct() const;
int GetGsPct() const;
int GetVUPct() const;
int GetGuiPct() const;
protected:

View File

@ -55,6 +55,7 @@ public:
void UpdateStats();
int GetEEcorePct() const;
int GetGsPct() const;
int GetVUPct() const;
int GetGuiPct() const;
};
@ -264,6 +265,11 @@ int CpuUsageProviderMSW::GetGsPct() const
return 0;
}
int CpuUsageProviderMSW::GetVUPct() const
{
return 0;
}
int CpuUsageProviderMSW::GetGuiPct() const
{
return 0;

View File

@ -533,10 +533,18 @@ void GSFrame::OnUpdateTitle( wxTimerEvent& evt )
}
FastFormatUnicode cpuUsage;
if( m_CpuUsage.IsImplemented() )
{
if (m_CpuUsage.IsImplemented()) {
m_CpuUsage.UpdateStats();
cpuUsage.Write( L" | EE: %3d%% | GS: %3d%% | UI: %3d%%", m_CpuUsage.GetEEcorePct(), m_CpuUsage.GetGsPct(), m_CpuUsage.GetGuiPct() );
if (THREAD_VU1) { // Display VU thread's usage
cpuUsage.Write(L" | EE: %3d%% | GS: %3d%% | VU: %3d%% | UI: %3d%%",
m_CpuUsage.GetEEcorePct(), m_CpuUsage.GetGsPct(),
m_CpuUsage.GetVUPct(), m_CpuUsage.GetGuiPct());
}
else {
cpuUsage.Write(L" | EE: %3d%% | GS: %3d%% | UI: %3d%%",
m_CpuUsage.GetEEcorePct(), m_CpuUsage.GetGsPct(),
m_CpuUsage.GetGuiPct());
}
}
const u64& smode2 = *(u64*)PS2GS_BASE(GS_SMODE2);

View File

@ -335,6 +335,7 @@ namespace Panels
pxCheckBox* m_check_fastCDVD;
pxCheckBox* m_check_vuFlagHack;
pxCheckBox* m_check_vuBlockHack;
pxCheckBox* m_check_vuThread;
public:
virtual ~SpeedHacksPanel() throw() {}

View File

@ -161,10 +161,13 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
wxPanelWithHelpers* vuHacksPanel = new wxPanelWithHelpers( right, wxVERTICAL, _("microVU Hacks") );
m_check_vuFlagHack = new pxCheckBox( vuHacksPanel, _("mVU Flag Hack"),
_("Good Speedup and High Compatibility; may cause garbage graphics, SPS, etc... [Recommended]") );
_("Good Speedup and High Compatibility; may cause bad graphics... [Recommended]" ) );
m_check_vuBlockHack = new pxCheckBox( vuHacksPanel, _("mVU Block Hack"),
_("Good Speedup and High Compatibility; may cause garbage graphics, SPS, etc...") );
_("Good Speedup and High Compatibility; may cause bad graphics, SPS, etc...") );
m_check_vuThread = new pxCheckBox( vuHacksPanel, _("MTVU (Multi-Threaded microVU1)"),
_("Good Speedup and High Compatibility; may cause hanging... [Recommended if 3+ cores]") );
m_check_vuFlagHack->SetToolTip( pxEt( "!ContextTip:Speedhacks:vuFlagHack",
L"Updates Status Flags only on blocks which will read them, instead of all the time. "
@ -176,6 +179,12 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
L"This should be pretty safe. It is unknown if this breaks any game..."
) );
m_check_vuThread->SetToolTip( pxEt( "!ContextTip:Speedhacks:vuThread",
L"Runs VU1 on its own thread (microVU1-only). Generally a speedup on CPUs with 3 or more cores. "
L"This is safe for most games, but a few games are incompatible and may hang. "
L"In the case of GS limited games, it may be a slowdown (especially on dual core CPUs)."
) );
// ------------------------------------------------------------------------
// All other hacks Section:
@ -226,7 +235,8 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
*vuHacksPanel += m_check_vuFlagHack;
*vuHacksPanel += m_check_vuBlockHack;
*vuHacksPanel += 57; // Aligns left and right boxes in default language and font size
*vuHacksPanel += m_check_vuThread;
//*vuHacksPanel += 57; // Aligns left and right boxes in default language and font size
*miscHacksPanel += m_check_intc;
*miscHacksPanel += m_check_waitloop;
@ -304,6 +314,7 @@ void Panels::SpeedHacksPanel::ApplyConfigToGui( AppConfig& configToApply, int fl
m_check_vuFlagHack ->SetValue(opts.vuFlagHack);
m_check_vuBlockHack ->SetValue(opts.vuBlockHack);
m_check_vuThread ->SetValue(opts.vuThread);
m_check_intc ->SetValue(opts.IntcStat);
m_check_waitloop ->SetValue(opts.WaitLoop);
m_check_fastCDVD ->SetValue(opts.fastCDVD);
@ -333,6 +344,7 @@ void Panels::SpeedHacksPanel::Apply()
opts.IntcStat = m_check_intc->GetValue();
opts.vuFlagHack = m_check_vuFlagHack->GetValue();
opts.vuBlockHack = m_check_vuBlockHack->GetValue();
opts.vuThread = m_check_vuThread->GetValue();
// If the user has a command line override specified, we need to disable it
// so that their changes take effect

View File

@ -17,6 +17,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Hardware.h"
#include "MTVU.h"
#include "IPU/IPUdma.h"
#include "ps2/HwInternal.h"
@ -91,7 +92,7 @@ __fi void setDmacStat(u32 num)
}
// Note: Dma addresses are guaranteed to be aligned to 16 bytes (128 bits)
__fi tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write)
__fi tDMA_TAG* SPRdmaGetAddr(u32 addr, bool write)
{
// if (addr & 0xf) { DMA_LOG("*PCSX2*: DMA address not 128bit aligned: %8.8x", addr); }
@ -114,6 +115,10 @@ __fi tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write)
}
else if ((addr >= 0x11004000) && (addr < 0x11010000))
{
if (THREAD_VU1) {
DevCon.Error("MTVU: SPRdmaGetAddr Accessing VU Memory!");
vu1Thread.WaitVU();
}
//Access for VU Memory
return (tDMA_TAG*)vtlb_GetPhyPtr(addr & 0x1FFFFFF0);
}

View File

@ -41,7 +41,7 @@
using namespace R5900;
using namespace vtlb_private;
#define verify pxAssume
#define verify pxAssert
namespace vtlb_private
{
@ -512,14 +512,14 @@ void vtlb_MapBlock(void* base, u32 start, u32 size, u32 blocksize)
{
verify(0==(start&VTLB_PAGE_MASK));
verify(0==(size&VTLB_PAGE_MASK) && size>0);
if (!blocksize)
if(!blocksize)
blocksize = size;
verify(0==(blocksize&VTLB_PAGE_MASK) && blocksize>0);
verify(0==(size%blocksize));
s32 baseint = (s32)base;
u32 end = start + (size - VTLB_PAGE_SIZE);
pxAssume( (end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap) );
verify((end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap));
while (start <= end)
{
@ -544,7 +544,7 @@ void vtlb_Mirror(u32 new_region,u32 start,u32 size)
verify(0==(size&VTLB_PAGE_MASK) && size>0);
u32 end = start + (size-VTLB_PAGE_SIZE);
pxAssume( (end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap) );
verify((end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap));
while(start <= end)
{

View File

@ -1322,6 +1322,14 @@
<Filter
Name="VU"
>
<File
RelativePath="..\..\MTVU.cpp"
>
</File>
<File
RelativePath="..\..\MTVU.h"
>
</File>
<File
RelativePath="..\..\VU.h"
>

View File

@ -99,6 +99,7 @@ void mVUreset(microVU& mVU, bool resetReserve) {
mVU.prog.x86start = z;
mVU.prog.x86ptr = z;
mVU.prog.x86end = z + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb);
//memset(mVU.prog.x86start, 0xcc, mVU.cacheSize*_1mb);
for(u32 i = 0; i < (mVU.progSize / 2); i++) {
if(!mVU.prog.prog[i]) {
@ -279,7 +280,6 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) {
//------------------------------------------------------------------
// recMicroVU0 / recMicroVU1
//------------------------------------------------------------------
recMicroVU0::recMicroVU0() { m_Idx = 0; IsInterpreter = false; }
recMicroVU1::recMicroVU1() { m_Idx = 1; IsInterpreter = false; }
void recMicroVU0::Vsync() throw() { mVUvsyncUpdate(microVU0); }
@ -290,8 +290,10 @@ void recMicroVU0::Reserve() {
mVUinit(microVU0, 0);
}
void recMicroVU1::Reserve() {
if (AtomicExchange(m_Reserved, 1) == 0)
if (AtomicExchange(m_Reserved, 1) == 0) {
mVUinit(microVU1, 1);
vu1Thread.InitThread();
}
}
void recMicroVU0::Shutdown() throw() {
@ -299,8 +301,10 @@ void recMicroVU0::Shutdown() throw() {
mVUclose(microVU0);
}
void recMicroVU1::Shutdown() throw() {
if (AtomicExchange(m_Reserved, 0) == 1)
if (AtomicExchange(m_Reserved, 0) == 1) {
vu1Thread.WaitVU();
mVUclose(microVU1);
}
}
void recMicroVU0::Reset() {
@ -309,6 +313,7 @@ void recMicroVU0::Reset() {
}
void recMicroVU1::Reset() {
if(!pxAssertDev(m_Reserved, "MicroVU1 CPU Provider has not been reserved prior to reset!")) return;
vu1Thread.WaitVU();
mVUreset(microVU1, true);
}
@ -325,8 +330,10 @@ void recMicroVU0::Execute(u32 cycles) {
void recMicroVU1::Execute(u32 cycles) {
pxAssert(m_Reserved); // please allocate me first! :|
if(!(VU0.VI[REG_VPU_STAT].UL & 0x100)) return;
((mVUrecCall)microVU1.startFunct)(VU1.VI[REG_TPC].UL, vu1RunCycles);
if (!THREAD_VU1) {
if(!(VU0.VI[REG_VPU_STAT].UL & 0x100)) return;
}
((mVUrecCall)microVU1.startFunct)(VU1.VI[REG_TPC].UL, cycles);
}
void recMicroVU0::Clear(u32 addr, u32 size) {

View File

@ -24,8 +24,8 @@ using namespace x86Emitter;
#include <algorithm>
#include "Common.h"
#include "VU.h"
#include "MTVU.h"
#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "iR5900.h"
#include "R5900OpcodeTables.h"
@ -217,9 +217,11 @@ struct microVU {
VURegs& regs() const { return ::vuRegs[index]; }
__fi VIFregisters& getVifRegs() const { return regs().GetVifRegs(); }
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
__fi VIFregisters& getVifRegs() const {
return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs();
}
};
// microVU rec structs

View File

@ -77,8 +77,10 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2);
if (isEbit || isVU1) { // Clear 'is busy' Flags
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
xAND(ptr32[&mVU.getVifRegs().stat], ~VIF1_STAT_VEW); // Clear VU 'is busy' signal for vif
if (!mVU.index || !THREAD_VU1) {
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
xAND(ptr32[&mVU.getVifRegs().stat], ~VIF1_STAT_VEW); // Clear VU 'is busy' signal for vif
}
}
if (isEbit != 2) { // Save PC, and Jump to Exit Point

View File

@ -199,7 +199,10 @@ _mVUt void mVUcleanUp() {
mVU.cycles = mVU.totalCycles - mVU.cycles;
mVU.regs().cycle += mVU.cycles;
cpuRegs.cycle += ((mVU.cycles < 3000) ? mVU.cycles : 3000) * EmuConfig.Speedhacks.VUCycleSteal;
if (!vuIndex || !THREAD_VU1) {
cpuRegs.cycle += std::min(mVU.cycles, 3000u) * EmuConfig.Speedhacks.VUCycleSteal;
}
//static int ax = 0; ax++;
//if (!(ax % 100000)) {
// for (u32 i = 0; i < (mVU.progSize / 2); i++) {

View File

@ -239,7 +239,14 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false)
}
// Gets called by mVUaddrFix at execution-time
static void __fastcall mVUwarningRegAccess(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); }
static void __fc mVUwarningRegAccess(u32 prog, u32 pc) {
Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog);
}
static void __fc mVUwaitMTVU() {
if (IsDevBuild) DevCon.WriteLn("microVU0: Waiting on VU1 thread to access VU1 regs!");
if (THREAD_VU1) vu1Thread.WaitVU();
}
// Transforms the Address in gprReg to valid VU0/VU1 Address
__fi void mVUaddrFix(mV, const x32& gprReg)
@ -249,28 +256,31 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
xSHL(gprReg, 4);
}
else {
if (IsDevBuild && !isCOP2) mVUbackupRegs(mVU, true);
xTEST(gprReg, 0x400);
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around
xForwardJump8 jmpB;
xForwardJump32 jmpB;
jmpA.SetTarget();
if (IsDevBuild && !isCOP2) { // Lets see which games do this!
xPUSH(gprT1); // Note: Kernel does it via COP2 to initialize VU1!
xPUSH(gprT2); // So we don't spam console, we'll only check micro-mode...
if (THREAD_VU1 || (IsDevBuild && !isCOP2)) {
mVUbackupRegs(mVU, true);
xPUSH(gprT1);
xPUSH(gprT2);
xPUSH(gprT3);
xMOV (gprT2, mVU.prog.cur->idx);
xMOV (gprT3, xPC);
xCALL(mVUwarningRegAccess);
if (IsDevBuild && !isCOP2) { // Lets see which games do this!
xMOV (gprT2, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV (gprT3, xPC); // So we don't spam console, we'll only check micro-mode...
xCALL(mVUwarningRegAccess);
}
xCALL(mVUwaitMTVU);
xPOP (gprT3);
xPOP (gprT2);
xPOP (gprT1);
mVUrestoreRegs(mVU, true);
}
xAND(gprReg, 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
jmpB.SetTarget();
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
if (IsDevBuild && !isCOP2) mVUrestoreRegs(mVU, true);
}
}

View File

@ -57,7 +57,6 @@ _vifT extern void dVifUnpack (const u8* data, bool isFill);
// nVifBlock - Ordered for Hashing; the 'num' field and the lower 6 bits of upkType are
// used as the hash bucket selector.
//
struct __aligned16 nVifBlock {
u8 num; // [00] Num Field
u8 upkType; // [01] Unpack Type [usn*1:mask*1:upk*4]
@ -74,6 +73,8 @@ struct __aligned16 nVifBlock {
#define _tParams nVifBlock, _hSize, _cmpS
struct nVifStruct {
__aligned16 nVifBlock block;
// Buffer for partial transfers (should always be first to ensure alignment)
// Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword)
__aligned16 u8 buffer[256*16];

View File

@ -19,30 +19,28 @@
#include "PrecompiledHeader.h"
#include "newVif_UnpackSSE.h"
#include "MTVU.h"
static __aligned16 nVifBlock _vBlock = {0};
void dVifReserve(int idx)
{
if (!nVif[idx].recReserve)
void dVifReserve(int idx) {
if(!nVif[idx].recReserve)
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx));
nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec );
}
void dVifReset(int idx) {
pxAssertDev(nVif[idx].recReserve, "Dynamic VIF recompiler reserve must be created prior to VIF use or reset!");
if (!nVif[idx].vifBlocks)
if(!nVif[idx].vifBlocks)
nVif[idx].vifBlocks = new HashBucket<_tParams>();
else
nVif[idx].vifBlocks->clear();
nVif[idx].recReserve->Reset();
nVif[idx].numBlocks = 0;
nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr();
nVif[idx].numBlocks = 0;
nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr();
//memset(nVif[idx].recWritePtr, 0xcc, nVif[idx].recReserveSizeMB * _1mb);
}
void dVifClose(int idx) {
@ -74,7 +72,8 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo
}
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
const vifStruct& vif = v.idx ? vif1 : vif0;
const int idx = v.idx;
const vifStruct& vif = MTVU_VifX;
u32 m0 = vB.mask;
u32 m1 = m0 & 0xaaaaaaaa;
@ -126,7 +125,8 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
}
void VifUnpackSSE_Dynarec::writeBackRow() const {
xMOVAPS(ptr128[&((v.idx ? vif1 : vif0).MaskRow)], xmmRow);
const int idx = v.idx;
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
// ToDo: Do we need to write back to vifregs.rX too!? :/
}
@ -208,25 +208,25 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
}
_vifT static __fi u8* dVifsetVUptr(uint cl, uint wl, bool isFill) {
vifStruct& vif = GetVifX;
const VURegs& VU = vuRegs[idx];
const uint vuMemLimit = idx ? 0x4000 : 0x1000;
nVifStruct& v = nVif[idx];
vifStruct& vif = MTVU_VifX;
const VURegs& VU = vuRegs[idx];
const uint vuMemLimit = idx ? 0x4000 : 0x1000;
u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10));
u8* endmem = VU.Mem + vuMemLimit;
uint length = (_vBlock.num > 0) ? (_vBlock.num * 16) : 4096; // 0 = 256
u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10));
u8* endmem = VU.Mem + vuMemLimit;
uint length = (v.block.num > 0) ? (v.block.num * 16) : 4096; // 0 = 256
if (!isFill) {
// Accounting for skipping mode: Subtract the last skip cycle, since the skipped part of the run
// shouldn't count as wrapped data. Otherwise, a trailing skip can cause the emu to drop back
// to the interpreter. -- Refraction (test with MGS3)
uint skipSize = (cl - wl) * 16;
uint blocks = _vBlock.num / wl;
uint blocks = v.block.num / wl;
length += (blocks-1) * skipSize;
}
if ( (startmem+length) <= endmem ) {
if ((startmem + length) <= endmem) {
return startmem;
}
//Console.WriteLn("nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", v.idx, vif.tag.addr, vif.tag.addr + (_vBlock.num * 16), _vBlock.num, wl, cl);
@ -245,12 +245,12 @@ static __fi void dVifRecLimit(int idx) {
}
}
_vifT static __fi bool dVifExecuteUnpack(const u8* data, bool isFill)
_vifT static __ri bool dVifExecuteUnpack(const u8* data, bool isFill)
{
const nVifStruct& v = nVif[idx];
VIFregisters& vifRegs = vifXRegs;
nVifStruct& v = nVif[idx];
VIFregisters& vifRegs = MTVU_VifXRegs;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
if (nVifBlock* b = v.vifBlocks->find(&v.block)) {
if (u8* dest = dVifsetVUptr<idx>(vifRegs.cycle.cl, vifRegs.cycle.wl, isFill)) {
//DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
@ -266,39 +266,37 @@ _vifT static __fi bool dVifExecuteUnpack(const u8* data, bool isFill)
_vifT __fi void dVifUnpack(const u8* data, bool isFill) {
const nVifStruct& v = nVif[idx];
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
nVifStruct& v = nVif[idx];
vifStruct& vif = MTVU_VifX;
VIFregisters& vifRegs = MTVU_VifXRegs;
const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5);
const int doMask = isFill? 1 : (vif.cmd & 0x10);
const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5);
const int doMask = isFill? 1 : (vif.cmd & 0x10);
_vBlock.upkType = upkType;
_vBlock.num = (u8&)vifRegs.num;
_vBlock.mode = (u8&)vifRegs.mode;
_vBlock.cl = vifRegs.cycle.cl;
_vBlock.wl = vifRegs.cycle.wl;
v.block.upkType = upkType;
v.block.num = (u8&)vifRegs.num;
v.block.mode = (u8&)vifRegs.mode;
v.block.cl = vifRegs.cycle.cl;
v.block.wl = vifRegs.cycle.wl;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? vifRegs.mask : 0;
v.block.mask = doMask ? vifRegs.mask : 0;
//DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++);
//DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]",
// _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode,
// doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
// v.Block.num, v.Block.upkType, v.Block.scl, v.Block.cl, v.Block.wl, v.Block.mode,
// doMask >> 4, doMask ? wxsFormat( L"0x%08x", v.Block.mask ).c_str() : L"ignored"
//);
if (dVifExecuteUnpack<idx>(data, isFill)) return;
xSetPtr(v.recWritePtr);
_vBlock.startPtr = (uptr)xGetAlignedCallTarget();
v.vifBlocks->add(_vBlock);
VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
v.block.startPtr = (uptr)xGetAlignedCallTarget();
v.vifBlocks->add(v.block);
VifUnpackSSE_Dynarec(v, v.block).CompileRoutine();
nVif[idx].recWritePtr = xGetPtr();
// [TODO] : Ideally we should test recompile buffer limits prior to each instruction,
// which would be safer and more memory efficient than using an 0.25 meg recEnd marker.
dVifRecLimit(idx);
// Run the block we just compiled. Various conditions may force us to still use

View File

@ -21,6 +21,7 @@
#include "Common.h"
#include "Vif_Dma.h"
#include "newVif.h"
#include "MTVU.h"
__aligned16 nVifStruct nVif[2];
@ -75,7 +76,7 @@ nVifStruct::nVifStruct()
vifBlocks = NULL;
numBlocks = 0;
recReserveSizeMB = 8;
recReserveSizeMB = 8;
}
void reserveNewVif(int idx)
@ -87,8 +88,8 @@ void resetNewVif(int idx)
// Safety Reset : Reassign all VIF structure info, just in case the VU1 pointers have
// changed for some reason.
nVif[idx].idx = idx;
nVif[idx].bSize = 0;
nVif[idx].idx = idx;
nVif[idx].bSize = 0;
memzero(nVif[idx].buffer);
if (newVifDynaRec) dVifReset(idx);
@ -106,8 +107,8 @@ static __fi u8* getVUptr(uint idx, int offset) {
_vifT int nVifUnpack(const u8* data) {
nVifStruct& v = nVif[idx];
vifStruct& vif = GetVifX;
nVifStruct& v = nVif[idx];
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
const uint ret = aMin(vif.vifpacketsize, vif.tag.size);
@ -118,6 +119,7 @@ _vifT int nVifUnpack(const u8* data) {
if (v.bSize) { // Last transfer was partial
memcpy_fast(&v.buffer[v.bSize], data, size);
v.bSize += size;
size = v.bSize;
data = v.buffer;
vif.cl = 0;
@ -125,8 +127,11 @@ _vifT int nVifUnpack(const u8* data) {
if (!vifRegs.num) vifRegs.num = 256;
}
if (newVifDynaRec) dVifUnpack<idx>(data, isFill);
else _nVifUnpack(idx, data, vifRegs.mode, isFill);
if (!idx || !THREAD_VU1) {
if (newVifDynaRec) dVifUnpack<idx>(data, isFill);
else _nVifUnpack(idx, data, vifRegs.mode, isFill);
}
else vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, size);
vif.tag.size = 0;
vif.cmd = 0;
@ -147,12 +152,10 @@ _vifT int nVifUnpack(const u8* data) {
// We can optimize the calculation either way as some games have big partial chunks (Guitar Hero).
// Skipping writes are easy, filling is a bit more complex, so for now until we can
// be sure its right (if it happens) it just prints debug stuff and processes the old way.
if(!isFill)
{
vifRegs.num -= (size / vSize);
if (!isFill) {
vifRegs.num -= (size / vSize);
}
else
{
else {
int guessedsize = (size / vSize);
guessedsize = vifRegs.num - (((guessedsize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + guessedsize);
@ -164,14 +167,11 @@ _vifT int nVifUnpack(const u8* data) {
if (vif.cl <= vifRegs.cycle.cl) size -= vSize;
else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0;
}
else
{
else {
size -= vSize;
if (vif.cl >= vifRegs.cycle.wl) vif.cl = 0;
}
}
DevCon.Warning("Fill!! Partial num left = %x, guessed %x", vifRegs.num, guessedsize);
}
}
@ -236,8 +236,8 @@ static void setMasks(const vifStruct& vif, const VIFregisters& v) {
template< int idx, bool doMode, bool isFill >
__ri void __fastcall _nVifUnpackLoop(const u8* data) {
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
vifStruct& vif = MTVU_VifX;
VIFregisters& vifRegs = MTVU_VifXRegs;
// skipSize used for skipping writes only
const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16;
@ -253,8 +253,8 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) {
//uint vn = (vif.cmd >> 2) & 0x3;
//uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ];
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ];
pxAssume (vif.cl == 0);
pxAssume (vifRegs.cycle.wl > 0);

View File

@ -26,7 +26,6 @@
#include "sVU_Micro.h"
#include "sVU_Debug.h"
#include "sVU_zerorec.h"
#include "Gif.h"
#include "Gif_Unit.h"
using namespace x86Emitter;

View File

@ -32,6 +32,7 @@
#include "GS.h"
#include "Gif.h"
#include "VU.h"
#include "MTVU.h"
#include "R5900.h"
#include "iR5900.h"
@ -456,15 +457,14 @@ void SuperVUReset(int vuindex)
s_recVUPtr[vuindex] = *s_recVUMem[vuindex];
}
// clear the block and any joining blocks
// clear the block and any joining blocks (size given in bytes)
static void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex)
{
vector<VuFunctionHeader::RANGE>::iterator itrange;
list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
u32 endpc = startpc + ((size * 4 + 7) & ~7); // Adding this code to ensure size is always a multiple of 8, it can be simplified to startpc+size if size is always a multiple of 8 (cottonvibes)
u32 endpc = startpc + ((size + 7) & ~7); // Ensure size is a multiple of u64 (round up)
while (it != s_listVUHeaders[vuindex].end())
{
// for every fn, check if it has code in the range
for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++)
{
@ -4641,11 +4641,13 @@ void recSuperVU1::Reserve()
void recSuperVU1::Shutdown() throw()
{
vu1Thread.WaitVU();
SuperVUDestroy( 1 );
}
void recSuperVU1::Reset()
{
vu1Thread.WaitVU();
SuperVUReset( 1 );
}