Made several modifications to the MTGS and Threading portions of Pcsx2. This should fix some problems in select games that use VIF FIFO 1 (downloads data from GS to EE, forcing an MTGS stall), and should also fix occasional MTGS freezeups during emulation init/exit. MTGS uses pthread's semaphores now instead of pthread_cond (Which as it turns out isn't really doing what I thought it should do). The semaphores are faster and more stable, so it's a win-win. :)

Programmer notes: Added a new u128 type to PS2Etypes.h, and a new SafeAlignedArray<> class to System.h.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@655 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2009-01-29 17:34:11 +00:00 committed by Gregory Hainaut
parent d55f680bde
commit b32d84516a
19 changed files with 434 additions and 200 deletions

View File

@ -156,6 +156,56 @@ typedef s32 sptr;
#endif
#endif
// A rough-and-ready cross platform 128-bit datatype, Non-SSE style.
#ifdef __cplusplus
struct u128
{
u64 lo;
u64 hi;
// Implicit conversion from u64
u128( u64 src ) :
lo( src )
, hi( 0 ) {}
// Implicit conversion from u32
u128( u32 src ) :
lo( src )
, hi( 0 ) {}
};
struct s128
{
s64 lo;
s64 hi;
// Implicit conversion from u64
s128( s64 src ) :
lo( src )
, hi( 0 ) {}
// Implicit conversion from u32
s128( s32 src ) :
lo( src )
, hi( 0 ) {}
};
#else
typedef union _u128_t
{
u64 lo;
u64 hi;
} u128;
typedef union _s128_t
{
s64 lo;
s64 hi;
} s128;
#endif
typedef struct {
int size;
s8 *data;

View File

@ -26,14 +26,14 @@ struct _u8bit_128 {
};
struct u128 {
struct u8bit_128 {
_u8bit_128 b8;
};
struct _cacheS {
u32 tag[2];
u128 data[2][4];
u8bit_128 data[2][4];
};
extern _cacheS pCache[64];

View File

@ -104,7 +104,7 @@ namespace Exception
RuntimeError( msg ) {}
};
// This exception exception thrown any time an operation is attempted when an object
// This exception thrown any time an operation is attempted when an object
// is in an uninitialized state.
class InvalidOperation : public LogicError
{
@ -114,6 +114,16 @@ namespace Exception
LogicError( msg ) {}
};
// Keep those array indexers in bounds when using the SafeArray type, or you'll be
// seeing these.
class IndexBoundsFault : public LogicError
{
public:
virtual ~IndexBoundsFault() throw() {}
explicit IndexBoundsFault( const std::string& msg="Array index is outsides the bounds of an array." ) :
LogicError( msg ) {}
};
class HardwareDeficiency : public RuntimeError
{
public:

View File

@ -120,8 +120,8 @@ void WriteFIFO(u32 mem, const u64 *value) {
if( mtgsThread != NULL )
{
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 16 );
jASSUME( count == 16 );
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 1 );
jASSUME( count == 1 );
u64* data = (u64*)mtgsThread->GetDataPacketPtr();
data[0] = value[0];
data[1] = value[1];

View File

@ -552,7 +552,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc)
if( mtgsThread != NULL )
{
int sizetoread = (qwc)<<4;
sizetoread = mtgsThread->PrepDataPacket( GIF_PATH_3, pMem, sizetoread );
sizetoread = mtgsThread->PrepDataPacket( GIF_PATH_3, pMem, qwc );
u8* pgsmem = mtgsThread->GetDataPacketPtr();
/* check if page of endmem is valid (dark cloud2) */
@ -579,7 +579,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc)
}
else
#endif
memcpy_aligned(pgsmem, pMem, sizetoread);
memcpy_aligned(pgsmem, pMem, sizetoread<<4);
mtgsThread->SendDataPacket();
}

View File

@ -112,7 +112,9 @@ struct GIFPath
/////////////////////////////////////////////////////////////////////////////
// MTGS Threaded Class Declaration
#define MTGS_RINGBUFFERSIZE 0x00300000 // 3Mb
// Uncomment this to enable the MTGS debug stack, which tracks to ensure reads
// and writes stay synchronized. Warning: the debug stack is VERY slow.
//#define RINGBUF_DEBUG_STACK
enum GIF_PATH
{
@ -143,18 +145,29 @@ enum GS_RINGTYPE
, GS_RINGTYPE_STARTTIME // special case for min==max fps frameskip settings
};
class mtgsThreadObject : public Threading::Thread
{
friend class SaveState;
protected:
// note: when g_pGSRingPos == g_pGSWritePos, the fifo is empty
const u8* m_RingPos; // cur pos gs is reading from
u8* m_WritePos; // cur pos ee thread is writing to
const u8* const m_RingBufferEnd; // pointer to the end of the ringbuffer (used to detect buffer wraps)
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
// A value of 17 is a 4meg ring buffer. 16 would be 2 megs, and 18 would be 8 megs.
static const uint m_RingBufferSizeFactor = 17;
Threading::WaitEvent m_wait_InitDone; // used to regulate thread startup and gsInit
// size of the ringbuffer in simd128's.
static const uint m_RingBufferSize = 1<<m_RingBufferSizeFactor;
// Mask to apply to ring buffer indices to wrap the pointer from end to
// start (the wrapping is what makes it a ringbuffer, yo!)
static const uint m_RingBufferMask = m_RingBufferSize - 1;
protected:
// note: when g_pGSRingPos == g_pGSWritePos, the fifo is empty
uint m_RingPos; // cur pos gs is reading from
uint m_WritePos; // cur pos ee thread is writing to
Threading::Semaphore m_post_InitDone; // used to regulate thread startup and gsInit
Threading::MutexLock m_lock_RingRestart;
// Used to delay the sending of events. Performance is better if the ringbuffer
@ -167,20 +180,23 @@ protected:
// Only one data packet can be constructed and uploaded at a time.
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
u8* m_packet_data; // pointer to the data location in the ringbuffer.
uint m_packet_ringpos; // index of the data location in the ringbuffer.
#ifdef RINGBUF_DEBUG_STACK
MutexLock m_lock_Stack;
Threading::MutexLock m_lock_Stack;
#endif
// the MTGS "dummy" GIFtag info!
// 16 byte alignment isn't "critical" here, so if GCC ignores the aignment directive
// it shouldn't cause any issues.
PCSX2_ALIGNED16( GIFPath m_path[3] );
// contains aligned memory allocations for gs and Ringbuffer.
SafeAlignedArray<u128,16> m_RingBuffer;
// mtgs needs its own memory space separate from the PS2. The PS2 memory is in
// synch with the EE while this stays in sync with the GS (ie, it lags behind)
PCSX2_ALIGNED16( u8 m_gsMem[0x2000] );
PCSX2_ALIGNED( 4096, u8 m_RingBuffer[MTGS_RINGBUFFERSIZE] );
u8* const m_gsMem;
public:
mtgsThreadObject();
@ -225,8 +241,8 @@ protected:
u32 _gifTransferDummy( GIF_PATH pathidx, const u8 *pMem, u32 size );
// Used internally by SendSimplePacket type functions
const u8* _PrepForSimplePacket();
void _FinishSimplePacket( const u8* future_writepos );
uint _PrepForSimplePacket();
void _FinishSimplePacket( uint future_writepos );
int Callback();
};

View File

@ -49,8 +49,8 @@ using namespace std;
// This allows us to delacre the vars as non-volatile and only use
// them as volatile when appropriate (more optimized).
#define volatize(x) (*(u8* volatile*)&(x)) // for writepos
#define volatize_c(x) (*(u8 * volatile*)&(x)) // for readpos
#define volatize(x) (*reinterpret_cast<volatile uint*>(&(x))) // for writepos
//#define volatize_c(x) (*(volatile u32*)&(x)) // for readpos
/////////////////////////////////////////////////////////////////////////////
// BEGIN -- MTGS GIFtag Parse Implementation
@ -164,27 +164,23 @@ static void RegHandlerLABEL(const u32* data)
GSSIGLBLID->LBLID = (GSSIGLBLID->LBLID&~data[1])|(data[0]&data[1]);
}
// END -- MTGS GIFtag Parse Implementation
// END -- MTGS GIFtag Parse Implementation
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
// MTGS Threaded Class Implementation
// MTGS Threaded Class Implementation
mtgsThreadObject* mtgsThread = NULL;
// Uncomment this to enable the MTGS debug stack, which tracks to ensure reads
// and writes stay synchronized. Warning: the debug stack is VERY slow.
//#define RINGBUF_DEBUG_STACK
#ifdef RINGBUF_DEBUG_STACK
#include <list>
std::list<uptr> ringposStack;
mutex_t stackLock;
std::list<uint> ringposStack;
#endif
#ifdef _DEBUG
// debug variable used to check for bad code bits where copies are started
// but never closed, or closed without having been started. (GSRingBufCopy calls
// should always be followed by acall to GSRINGBUF_DONECOPY)
// should always be followed by a call to GSRINGBUF_DONECOPY)
static int copyLock = 0;
#endif
@ -192,27 +188,29 @@ typedef void (*GIFRegHandler)(const u32* data);
static GIFRegHandler s_GSHandlers[3] = { RegHandlerSIGNAL, RegHandlerFINISH, RegHandlerLABEL };
mtgsThreadObject::mtgsThreadObject() :
m_RingPos( m_RingBuffer )
, m_WritePos( m_RingBuffer )
, m_RingBufferEnd( m_RingBuffer + sizeof( m_RingBuffer ) )
m_RingPos( 0 )
, m_WritePos( 0 )
, m_wait_InitDone()
, m_post_InitDone()
, m_lock_RingRestart()
, m_CopyCommandTally( 0 )
, m_CopyDataTally( 0 )
, m_RingBufferIsBusy( 0 )
, m_packet_size()
, m_packet_data( NULL )
, m_packet_size( 0 )
, m_packet_ringpos( 0 )
#ifdef RINGBUF_DEBUG_STACK
, m_lock_Stack()
#endif
, m_RingBuffer( m_RingBufferSize + (Ps2MemSize::GSregs/sizeof(u128)) )
, m_gsMem( (u8*)m_RingBuffer.GetPtr( m_RingBufferSize ) )
{
// Wait for the thread to finish initialization (it runs GSinit, which can take
// some time since it's creating a new window and all), and then check for errors.
m_wait_InitDone.Wait();
m_post_event.Post(); // tell MTGS we're done here
m_post_InitDone.Wait(); // and wait for MTGS to be done there!
if( m_returncode != 0 ) // means the thread failed to init the GS plugin
throw Exception::PluginFailure( "GS", "The GS plugin failed to open/initialize." );
@ -233,7 +231,7 @@ void mtgsThreadObject::Reset()
// * Signal a reset.
// * clear the path and byRegs structs (used by GIFtagDummy)
AtomicExchangePointer( m_RingPos, m_WritePos );
AtomicExchange( m_RingPos, m_WritePos );
MTGS_LOG( "MTGS > Sending Reset...\n" );
SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 );
@ -406,14 +404,15 @@ __forceinline u32 mtgsThreadObject::_gifTransferDummy( GIF_PATH pathidx, const u
}
}
// FIXME: dq8, pcsx2 error probably
if(pathidx == 0)
{
if(!path.tag.eop && path.tag.nloop > 0)
{
path.tag.nloop = 0;
DevCon::Write( "path1 hack! " );
// This means that the giftag data got screwly somewhere
// along the way (often means curreg was in a bad state or something)
}
}
#ifdef PCSX2_GSRING_SAMPLING_STATS
@ -426,119 +425,142 @@ __forceinline u32 mtgsThreadObject::_gifTransferDummy( GIF_PATH pathidx, const u
return size;
}
struct PacketTagType
{
u32 command;
u32 data[3];
};
int mtgsThreadObject::Callback()
{
Console::WriteLn("MTGS > Thread Started, Opening GS Plugin...");
// Wait for the MTGS to initialize structures.
m_post_event.Wait();
memcpy_aligned( m_gsMem, PS2MEM_GS, sizeof(m_gsMem) );
GSsetBaseMem( m_gsMem );
m_returncode = GSopen((void *)&pDsp, "PCSX2", 1);
GSCSRr = 0x551B400F; // 0x55190000
m_wait_InitDone.Set();
m_post_InitDone.Post();
if (m_returncode != 0) { return m_returncode; } // error msg will be issued to the user by Plugins.c
Console::WriteLn("MTGS > GSopen Finished.");
#ifdef RINGBUF_DEBUG_STACK
u32 prevCmd=0;
PacketTagType prevCmd;
#endif
while( !m_sigterm )
{
m_wait_event.Wait();
m_post_event.Wait();
//if( m_sigterm ) break;
AtomicExchange( m_RingBufferIsBusy, 1 );
// note: m_RingPos is intentionally not volatile, because it should only
// ever be modified by this thread.
while( m_RingPos != volatize(m_WritePos))
{
assert( m_RingPos < m_RingBufferEnd );
assert( m_RingPos < m_RingBufferSize );
u32 tag = *(u32*)m_RingPos;
u32 ringposinc = 16;
const PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_RingPos];
u32 ringposinc = 1;
#ifdef RINGBUF_DEBUG_STACK
// pop a ringpos off the stack. It should match this one!
EnterCriticalSection( &stackLock );
m_lock_Stack.Lock();
uptr stackpos = ringposStack.back();
if( stackpos != (uptr)m_RingPos )
if( stackpos != m_RingPos )
{
Console::Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, (long)m_RingPos, prevCmd );
Console::Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", params stackpos, m_RingPos, prevCmd.command );
}
assert( stackpos == (long)m_RingPos );
assert( stackpos == m_RingPos );
prevCmd = tag;
ringposStack.pop_back();
LeaveCriticalSection( &stackLock );
m_lock_Stack.Unlock();
#endif
switch( tag&0xffff )
switch( tag.command )
{
case GS_RINGTYPE_RESTART:
AtomicExchangePointer(m_RingPos, m_RingBuffer);
AtomicExchange(m_RingPos, 0);
// stall for a bit to let the MainThread have time to update the g_pGSWritePos.
m_lock_RingRestart.Lock();
m_lock_RingRestart.Unlock();
continue;
continue;
case GS_RINGTYPE_P1:
{
int qsize = (tag>>16);
const int qsize = tag.data[0];
const u128* data = m_RingBuffer.GetPtr( m_RingPos+1 );
// make sure that tag>>16 is the MAX size readable
GSgifTransfer1((u32*)(m_RingPos+16) - 0x1000 + 4*qsize, 0x4000-qsize*16);
ringposinc += qsize<<4;
break;
//GSgifTransfer1(((u32*)data) - 0x1000 + 4*qsize, 0x4000-qsize*16);
GSgifTransfer1((u32*)(data - 0x400 + qsize), 0x4000-qsize*16);
ringposinc += qsize;
}
break;
case GS_RINGTYPE_P2:
GSgifTransfer2((u32*)(m_RingPos+16), tag>>16);
ringposinc += (tag>>16)<<4;
break;
{
const int qsize = tag.data[0];
const u128* data = m_RingBuffer.GetPtr( m_RingPos+1 );
GSgifTransfer2((u32*)data, qsize);
ringposinc += qsize;
}
break;
case GS_RINGTYPE_P3:
GSgifTransfer3((u32*)(m_RingPos+16), tag>>16);
ringposinc += (tag>>16)<<4;
break;
{
const int qsize = tag.data[0];
const u128* data = m_RingBuffer.GetPtr( m_RingPos+1 );
GSgifTransfer3((u32*)data, qsize);
ringposinc += qsize;
}
break;
case GS_RINGTYPE_VSYNC:
{
GSvsync(*(u32*)(m_RingPos+4));
GSvsync(tag.data[0]);
gsFrameSkip( !( *(u32*)(m_RingPos+8) ) );
gsFrameSkip( !tag.data[1] );
if( PAD1update != NULL ) PAD1update(0);
if( PAD2update != NULL ) PAD2update(1);
break;
}
break;
case GS_RINGTYPE_FRAMESKIP:
_gs_ResetFrameskip();
break;
break;
case GS_RINGTYPE_MEMWRITE8:
m_gsMem[*(u32*)(m_RingPos+4)] = *(u8*)(m_RingPos+8);
break;
m_gsMem[tag.data[0]] = (u8)tag.data[1];
break;
case GS_RINGTYPE_MEMWRITE16:
*(u16*)(m_gsMem+*(u32*)(m_RingPos+4)) = *(u16*)(m_RingPos+8);
break;
*(u16*)(m_gsMem+tag.data[0]) = (u16)tag.data[1];
break;
case GS_RINGTYPE_MEMWRITE32:
*(u32*)(m_gsMem+*(u32*)(m_RingPos+4)) = *(u32*)(m_RingPos+8);
break;
*(u32*)(m_gsMem+tag.data[0]) = tag.data[1];
break;
case GS_RINGTYPE_MEMWRITE64:
*(u64*)(m_gsMem+*(u32*)(m_RingPos+4)) = *(u64*)(m_RingPos+8);
break;
*(u64*)(m_gsMem+tag.data[0]) = *(u64*)&tag.data[1];
break;
case GS_RINGTYPE_FREEZE:
{
//SaveState* f = (SaveState*)(*(uptr*)(m_RingPos+8));
freezeData* data = (freezeData*)(*(uptr*)(m_RingPos+8));
int mode = *(s32*)(m_RingPos+4);
freezeData* data = (freezeData*)(*(uptr*)&tag.data[1]);
int mode = tag.data[0];
GSfreeze( mode, data );
break;
}
case GS_RINGTYPE_RECORD:
{
int record = *(u32*)(m_RingPos+4);
int record = tag.data[0];
if( GSsetupRecording != NULL ) GSsetupRecording(record, NULL);
if( SPU2setupRecording != NULL ) SPU2setupRecording(record, NULL);
break;
@ -551,27 +573,27 @@ int mtgsThreadObject::Callback()
case GS_RINGTYPE_SOFTRESET:
{
int mask = *(u32*)(m_RingPos+4);
int mask = tag.data[0];
MTGS_LOG( "MTGS > Receiving GIF Soft Reset (mask: %d)\n", mask );
GSgifSoftReset( mask );
break;
}
case GS_RINGTYPE_WRITECSR:
GSwriteCSR( *(u32*)(m_RingPos+4) );
GSwriteCSR( tag.data[0] );
break;
case GS_RINGTYPE_MODECHANGE:
_gs_ChangeTimings( *(u32*)(m_RingPos+4), *(u32*)(m_RingPos+8) );
_gs_ChangeTimings( tag.data[0], tag.data[1] );
break;
case GS_RINGTYPE_STARTTIME:
m_iSlowStart += *(u32*)(m_RingPos+4);
m_iSlowStart += tag.data[0];
break;
#ifdef PCSX2_DEVBUILD
default:
Console::Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", params tag, m_RingPos, m_WritePos);
Console::Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", params tag.command, m_RingPos, m_WritePos);
assert(0);
m_RingPos = m_WritePos;
continue;
@ -581,12 +603,10 @@ int mtgsThreadObject::Callback()
#endif
}
const u8* newringpos = m_RingPos + ringposinc;
assert( newringpos <= m_RingBufferEnd );
if( newringpos == m_RingBufferEnd )
newringpos = m_RingBuffer;
AtomicExchangePointer( m_RingPos, newringpos );
uint newringpos = m_RingPos + ringposinc;
assert( newringpos <= m_RingBufferSize );
newringpos &= m_RingBufferMask;
AtomicExchange( m_RingPos, newringpos );
}
AtomicExchange( m_RingBufferIsBusy, 0 );
}
@ -616,7 +636,7 @@ void mtgsThreadObject::WaitGS()
// For use in loops that wait on the GS thread to do certain things.
void mtgsThreadObject::SetEvent()
{
m_wait_event.Set();
m_post_event.Post();
m_CopyCommandTally = 0;
m_CopyDataTally = 0;
}
@ -635,30 +655,28 @@ void mtgsThreadObject::SetEventWait()
u8* mtgsThreadObject::GetDataPacketPtr() const
{
return m_packet_data;
return (u8*)m_RingBuffer.GetPtr( m_packet_ringpos );
}
// Closes the data packet send command, and initiates the gs thread (if needed).
void mtgsThreadObject::SendDataPacket()
{
// make sure a previous copy block has been started somewhere.
jASSUME( m_packet_data != NULL );
jASSUME( m_packet_size != 0 );
const u8* temp = m_packet_data + m_packet_size;
jASSUME( temp <= m_RingBufferEnd );
if( temp == m_RingBufferEnd )
temp = m_RingBuffer;
uint temp = m_packet_ringpos + m_packet_size;
jASSUME( temp <= m_RingBufferSize );
temp &= m_RingBufferMask;
#ifdef _DEBUG
else
if( m_packet_ringpos + m_packet_size < m_RingBufferSize )
{
const u8* readpos = volatize(m_RingPos);
uint readpos = volatize(m_RingPos);
if( readpos != m_WritePos )
{
// The writepos should never leapfrog the readpos
// since that indicates a bad write.
if( m_packet_data < readpos )
if( m_packet_ringpos < readpos )
assert( temp < readpos );
}
@ -669,9 +687,9 @@ void mtgsThreadObject::SendDataPacket()
}
#endif
AtomicExchangePointer( m_WritePos, temp );
AtomicExchange( m_WritePos, temp );
m_packet_data = NULL;
m_packet_size = 0;
if( m_RingBufferIsBusy ) return;
@ -689,7 +707,7 @@ void mtgsThreadObject::SendDataPacket()
// 8 - roughly 2% slower on HT machines.
m_CopyDataTally += m_packet_size;
if( ( m_CopyDataTally > 0x40000 ) || ( ++m_CopyCommandTally > 16 ) )
if( ( m_CopyDataTally > 0x4000 ) || ( ++m_CopyCommandTally > 16 ) )
{
FreezeXMMRegs(1);
FreezeMMXRegs(1);
@ -727,6 +745,8 @@ static u32 GSRingBufCopySz = 0;
// returns the amount of giftag data not processed (in simd128 values).
// Return value is used by VU1 XGKICK to hack-fix data packets which are too
// large for VU1 memory.
// Parameters:
// size - size of the packet data, in smd128's
int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
{
#ifdef PCSX2_GSRING_TX_STATS
@ -777,34 +797,31 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s
// interlocked exchanges when we modify it, however, since the GS thread
// is reading it.
const u8 *writepos = m_WritePos;
uint writepos = m_WritePos;
// Checks if a previous copy was started without an accompanying call to GSRINGBUF_DONECOPY
jASSUME( m_packet_data == NULL );
jASSUME( m_packet_size == 0 );
// Sanity checks! (within the confines of our ringbuffer please!)
jASSUME( size < MTGS_RINGBUFFERSIZE );
jASSUME( writepos < m_RingBufferEnd );
// Alignment checks! (16 bytes please!)
jASSUME( ((uptr)writepos & 15) == 0 );
//jASSUME( (size&15) == 0);
jASSUME( size < m_RingBufferSize );
jASSUME( writepos < m_RingBufferSize );
//fixme: Vif sometimes screws up and size is unaligned, try this then (rama)
if( (size&15) != 0){
// Is this still a problem? It should be fixed on the specific VIF command now. (air)
/*if( (size&15) != 0){
Console::Error( "MTGS problem, size unaligned");
size = (size+15)&(~15);
}
}*/
// retval has the amount of data *not* processed, so we only need to reserve
// enough room for size - retval:
int retval = _gifTransferDummy( pathidx, srcdata, size>>4 );
int retval = _gifTransferDummy( pathidx, srcdata, size );
size = size - (retval<<4);
size = size - retval;
m_packet_size = size;
size += 16; // takes into account our command qword.
size++; // takes into account our command qword.
if( writepos + size < m_RingBufferEnd )
if( writepos + size < m_RingBufferSize )
{
// generic gs wait/stall.
// Waits until the readpos is outside the scope of the write area.
@ -812,7 +829,7 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s
{
// two conditionals in the following while() loop, so precache
// the readpos for more efficient behavior:
const u8* readpos = volatize_c(m_RingPos);
uint readpos = volatize(m_RingPos);
// if the writepos is past the readpos then we're safe:
if( writepos >= readpos ) break;
@ -824,7 +841,7 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s
SetEventWait();
}
}
else if( writepos + size > m_RingBufferEnd )
else if( writepos + size > m_RingBufferSize )
{
// If the incoming packet doesn't fit, then start over from
// the start of the ring buffer (it's a lot easier than trying
@ -836,7 +853,7 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s
while( true )
{
const u8* readpos = volatize(m_RingPos);
uint readpos = volatize(m_RingPos);
// is the buffer empty?
if( readpos == writepos ) break;
@ -844,22 +861,22 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s
// Also: Wait for the readpos to go past the start of the buffer
// Otherwise it'll stop dead in its tracks when we set the new write
// position below (bad!)
if( readpos < writepos && readpos != m_RingBuffer ) break;
if( readpos < writepos && readpos != 0 ) break;
SetEventWait();
}
m_lock_RingRestart.Lock();
SendSimplePacket( GS_RINGTYPE_RESTART, 0, 0, 0 );
writepos = m_RingBuffer;
AtomicExchangePointer( m_WritePos, writepos );
writepos = 0;
AtomicExchange( m_WritePos, writepos );
m_lock_RingRestart.Unlock();
// stall until the read position is past the end of our incoming block,
// or until it reaches the current write position (signals an empty buffer).
while( true )
{
const u8* readpos = volatize(m_RingPos);
uint readpos = volatize(m_RingPos);
if( readpos == m_WritePos ) break;
if( writepos+size < readpos ) break;
@ -874,48 +891,48 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s
//SysPrintf( "MTGS > Perfect Fit!\n");
while( true )
{
const u8* readpos = volatize(m_RingPos);
uint readpos = volatize(m_RingPos);
// is the buffer empty? Don't wait...
if( readpos == writepos ) break;
// Copy is ready so long as readpos is less than writepos and *not*
// equal to the base of the ringbuffer (otherwise the buffer will stop)
if( readpos < writepos && readpos != m_RingBuffer ) break;
if( readpos < writepos && readpos != 0 ) break;
SetEventWait();
}
}
#ifdef RINGBUF_DEBUG_STACK
mutex_lock( stackLock );
ringposStack.push_front( (uptr)writepos );
mutex_unlock( stackLock );
m_lock_Stack.Lock();
ringposStack.push_front( writepos );
m_lock_Stack.Unlock();
#endif
// Command qword: Low word is the command, and the high word is the packet
// length in SIMDs (128 bits).
const uint simd_size = (m_packet_size>>4); // minus the command byte!
*(u32*)m_WritePos = (pathidx+1) | (simd_size<<16);
m_packet_data = m_WritePos + 16;
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
tag.command = pathidx+1;
tag.data[0] = m_packet_size;
m_packet_ringpos = m_WritePos + 1;
return m_packet_size;
}
__forceinline const u8* mtgsThreadObject::_PrepForSimplePacket()
__forceinline uint mtgsThreadObject::_PrepForSimplePacket()
{
#ifdef RINGBUF_DEBUG_STACK
m_lock_Stack.Lock();
ringposStack.push_front( (uptr)m_WritePos );
ringposStack.push_front( m_WritePos );
m_lock_Stack.Unlock();
#endif
const u8* future_writepos = m_WritePos+16;
jASSUME( future_writepos <= m_RingBufferEnd );
uint future_writepos = m_WritePos+1;
jASSUME( future_writepos <= m_RingBufferSize );
if( future_writepos >= m_RingBufferEnd )
future_writepos = m_RingBuffer;
future_writepos &= m_RingBufferMask;
while( future_writepos == volatize(m_RingPos) )
SetEventWait();
@ -923,31 +940,33 @@ __forceinline const u8* mtgsThreadObject::_PrepForSimplePacket()
return future_writepos;
}
__forceinline void mtgsThreadObject::_FinishSimplePacket( const u8* future_writepos )
__forceinline void mtgsThreadObject::_FinishSimplePacket( uint future_writepos )
{
assert( future_writepos != volatize(m_RingPos) );
AtomicExchangePointer( m_WritePos, future_writepos );
AtomicExchange( m_WritePos, future_writepos );
}
void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, int data2 )
{
const u8* const thefuture = _PrepForSimplePacket();
const uint thefuture = _PrepForSimplePacket();
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
*(u32*)m_WritePos = type;
*(u32*)(m_WritePos+4) = data0;
*(u32*)(m_WritePos+8) = data1;
*(u32*)(m_WritePos+12) = data2;
tag.command = type;
tag.data[0] = data0;
tag.data[1] = data1;
tag.data[2] = data2;
_FinishSimplePacket( thefuture );
}
void mtgsThreadObject::SendPointerPacket( GS_RINGTYPE type, u32 data0, void* data1 )
{
const u8* const thefuture = _PrepForSimplePacket();
const uint thefuture = _PrepForSimplePacket();
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
*(u32*)m_WritePos = type;
*(u32*)(m_WritePos+4) = data0;
*(uptr*)(m_WritePos+8) = (uptr)data1;
tag.command = type;
tag.data[0] = data0;
*(uptr*)&tag.data[1] = (uptr)data1;
_FinishSimplePacket( thefuture );
}
@ -1004,4 +1023,4 @@ void mtgsThreadObject::Freeze( SaveState& state )
void mtgsRingBufSimplePacket( s32 command, u32 data0, u32 data1, u32 data2 )
{
mtgsThread->SendSimplePacket( (GS_RINGTYPE)command, data0, data1, data2 );
}
}

View File

@ -174,6 +174,9 @@ void memMapVUmicro()
{
vtlb_MapHandler(vu0_micro_mem[CHECK_VU0REC ? 0 : 1],0x11000000,0x00004000);
vtlb_MapHandler(vu1_micro_mem[CHECK_VU1REC ? 0 : 1],0x11008000,0x00004000);
vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000);
vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000);
}
void memMapPhy()
@ -193,9 +196,6 @@ void memMapPhy()
//IOP mem
vtlb_MapBlock(psxM,0x1c000000,0x00800000);
vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000);
vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000);
//These fallback to mem* stuff ...
vtlb_MapHandler(tlb_fallback_1,0x10000000,0x10000);
vtlb_MapHandler(tlb_fallback_6,0x12000000,0x10000);
@ -455,11 +455,22 @@ void __fastcall _ext_memWrite128(u32 mem, const u64 *value)
typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc );
template<int vunum, bool dynarec>
static __forceinline ClearFunc_t& GetClearFunc()
static __forceinline void ClearVuFunc( u32 addr, u32 size )
{
return dynarec ?
(( vunum==0 ) ? VU0micro::recClear : VU1micro::recClear)
: (( vunum==0 ) ? VU0micro::intClear : VU1micro::intClear);
if( dynarec )
{
if( vunum==0 )
VU0micro::recClear(addr,size);
else
VU1micro::recClear(addr,size);
}
else
{
if( vunum==0 )
VU0micro::intClear(addr,size);
else
VU1micro::intClear(addr,size);
}
}
template<int vunum>
@ -521,7 +532,7 @@ void __fastcall vuMicroWrite8(u32 addr,mem8_t data)
{
vu.Micro[addr]=data;
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
ClearVuFunc<vunum, dynrec>(addr&(~7),1);
}
}
@ -535,7 +546,7 @@ void __fastcall vuMicroWrite16(u32 addr,mem16_t data)
{
*(u16*)&vu.Micro[addr]=data;
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
ClearVuFunc<vunum, dynrec>(addr&(~7),1);
}
}
@ -549,7 +560,7 @@ void __fastcall vuMicroWrite32(u32 addr,mem32_t data)
{
*(u32*)&vu.Micro[addr]=data;
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
ClearVuFunc<vunum, dynrec>(addr&(~7),1);
}
}
@ -563,7 +574,7 @@ void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data)
{
*(u64*)&vu.Micro[addr]=data[0];
GetClearFunc<vunum, dynrec>()(addr,1);
ClearVuFunc<vunum, dynrec>(addr,1);
}
}
@ -578,7 +589,7 @@ void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data)
*(u64*)&vu.Micro[addr]=data[0];
*(u64*)&vu.Micro[addr+8]=data[1];
GetClearFunc<vunum, dynrec>()(addr,2);
ClearVuFunc<vunum, dynrec>(addr,2);
}
}
@ -696,7 +707,6 @@ void memReset()
vtlb_Init();
tlb_fallback_0=vtlb_RegisterHandlerTempl1(_ext_mem,0);
//tlb_fallback_1=vtlb_RegisterHandlerTempl1(_ext_mem,1);
tlb_fallback_2=vtlb_RegisterHandlerTempl1(_ext_mem,2);
tlb_fallback_3=vtlb_RegisterHandlerTempl1(_ext_mem,3);
tlb_fallback_4=vtlb_RegisterHandlerTempl1(_ext_mem,4);

View File

@ -33,14 +33,16 @@ namespace Ps2MemSize
{
static const uint Base = 0x02000000; // 32 MB main memory!
static const uint Rom = 0x00400000; // 4 MB main rom
static const uint Rom1 = 0x00040000; // fixme - TLB allocates 0x00080000 ?
static const uint Rom2 = 0x00080000;
static const uint ERom = 0x001C0000;
static const uint Rom1 = 0x00040000; // DVD player
static const uint Rom2 = 0x00080000; // Chinese rom extension (?)
static const uint ERom = 0x001C0000; // DVD player extensions (?)
static const uint Hardware = 0x00010000;
static const uint Scratch = 0x00004000; // fixme - VM allocates 0x10000 ?
static const uint Scratch = 0x00004000;
static const uint IopRam = 0x200000; // 2MB main ram on the IOP.
static const uint IopRam = 0x00200000; // 2MB main ram on the IOP.
static const uint IopHardware = 0x00010000;
static const uint GSregs = 0x00002000; // 8k for the GS registers and stuff.
}
#ifdef PCSX2_VIRTUAL_MEM

View File

@ -622,14 +622,28 @@ void ShutdownPlugins()
OpenStatus.GS = false;
}
GSshutdown();
PAD1shutdown();
PAD2shutdown();
SPU2shutdown();
CDVDshutdown();
DEV9shutdown();
USBshutdown();
FWshutdown();
if( GSshutdown != NULL )
GSshutdown();
if( PAD1shutdown != NULL )
PAD1shutdown();
if( PAD2shutdown != NULL )
PAD2shutdown();
if( SPU2shutdown != NULL )
SPU2shutdown();
if( CDVDshutdown != NULL )
CDVDshutdown();
if( DEV9shutdown != NULL )
DEV9shutdown();
if( USBshutdown != NULL )
USBshutdown();
if( FWshutdown != NULL )
FWshutdown();
}
int LoadPlugins() {
@ -867,4 +881,4 @@ void PluginsResetGS()
int ret = GSinit();
if (ret != 0) { Msgbox::Alert("GSinit error: %d", params ret); }
}
}

View File

@ -283,6 +283,25 @@ protected:
int m_size; // size of the allocation of memory
const static std::string m_str_Unnamed;
protected:
// Internal contructor for use by derrived classes. This allws a derrived class to
// use its own memory allocation (with an aligned memory, for example).
// Throws:
// Exception::OutOfMemory if the allocated_mem pointr is NULL.
explicit MemoryAlloc( const std::string& name, T* allocated_mem, int initSize ) :
Name( name )
, ChunkSize( DefaultChunkSize )
, m_ptr( allocated_mem )
, m_size( initSize )
{
if( m_ptr == NULL )
throw Exception::OutOfMemory();
}
virtual T* _virtual_realloc( int newsize )
{
return (T*)realloc( m_ptr, newsize * sizeof(T) );
}
public:
virtual ~MemoryAlloc()
@ -322,7 +341,7 @@ public:
if( blockSize > m_size )
{
const uint newalloc = blockSize + ChunkSize;
m_ptr = (T*)realloc( m_ptr, newalloc * sizeof(T) );
m_ptr = _virtual_realloc( newalloc );
if( m_ptr == NULL )
{
throw Exception::OutOfMemory(
@ -353,19 +372,69 @@ public:
}
protected:
// A safe array index fetcher. Throws an exception if the array index
// is outside the bounds of the array.
// Performance Considerations: This function adds quite a bit of overhead
// to array indexing and thus should be done infrequently if used in
// time-critical situations. Indead of using it from inside loops, cache
// the pointer into a local variable and use stad (unsafe) C indexes.
T* _getPtr( uint i ) const
{
#ifdef PCSX2_DEVBUILD
if( i >= (uint)m_size )
{
throw std::out_of_range(
throw Exception::IndexBoundsFault(
"Index out of bounds on MemoryAlloc: " + Name +
" (index=" + to_string(i) +
", size=" + to_string(m_size) + ")"
);
}
#endif
return &m_ptr[i];
}
};
template< typename T, uint Alignment >
class SafeAlignedArray : public MemoryAlloc<T>
{
protected:
T* _virtual_realloc( int newsize )
{
// TODO : aligned_realloc will need a linux implementation now. -_-
return (T*)_aligned_realloc( m_ptr, newsize * sizeof(T), Alignment );
}
// Appends "(align: xx)" to the name of the allocation in devel builds.
// Maybe useful,maybe not... no harm in atatching it. :D
string _getName( const string& src )
{
#ifdef PCSX2_DEVBUILD
return src + "(align:" + to_string(Alignment) + ")";
#endif
return src;
}
public:
virtual ~SafeAlignedArray()
{
safe_aligned_free( m_ptr );
// mptr is set to null, so the parent class's destructor won't re-free it.
}
explicit SafeAlignedArray( const std::string& name="Unnamed" ) :
MemoryAlloc( name )
{
}
explicit SafeAlignedArray( int initialSize, const std::string& name="Unnamed" ) :
MemoryAlloc(
_getName(name),
(T*)_aligned_malloc( initialSize * sizeof(T), Alignment ),
initialSize
)
{
}
};
#endif /* __SYSTEM_H__ */

View File

@ -28,7 +28,7 @@ namespace Threading
, m_returncode( 0 )
, m_terminated( false )
, m_sigterm( 0 )
, m_wait_event()
, m_post_event()
{
if( pthread_create( &m_thread, NULL, _internal_callback, this ) != 0 )
throw Exception::ThreadCreationError();
@ -42,7 +42,7 @@ namespace Threading
void Thread::Close()
{
AtomicExchange( m_sigterm, 1 );
m_wait_event.Set();
m_post_event.Post();
pthread_join( m_thread, NULL );
}
@ -82,6 +82,38 @@ namespace Threading
pthread_mutex_unlock( &mutex );
}
Semaphore::Semaphore()
{
sem_init( &sema, false, 0 );
}
Semaphore::~Semaphore()
{
sem_destroy( &sema );
}
void Semaphore::Post()
{
sem_post( &sema );
}
void Semaphore::Post( int multiple )
{
sem_post_multiple( &sema, multiple );
}
void Semaphore::Wait()
{
sem_wait( &sema );
}
int Semaphore::Count()
{
int retval;
sem_getvalue( &sema, &retval );
return retval;
}
MutexLock::MutexLock()
{
int err = 0;

View File

@ -20,6 +20,7 @@
#define _THREADING_H_
#include <errno.h> // EBUSY
#include <semaphore.h>
#include "PS2Etypes.h"
#include "Exceptions.h"
@ -41,6 +42,19 @@ namespace Threading
void Wait();
};
struct Semaphore
{
sem_t sema;
Semaphore();
~Semaphore();
void Post();
void Post( int multiple );
void Wait();
int Count();
};
struct MutexLock
{
pthread_mutex_t mutex;
@ -70,7 +84,7 @@ namespace Threading
int m_returncode; // value returned from the thread on close.
bool m_terminated; // set true after the thread has been closed.
u32 m_sigterm; // set to true(1) when the thread has been requested to exit.
WaitEvent m_wait_event; // general wait event that's needed by most threads.
Semaphore m_post_event; // general wait event that's needed by most threads.
public:
virtual ~Thread();

View File

@ -1587,8 +1587,8 @@ static int Vif1TransDirectHL(u32 *data){
{
// copy 16 bytes the fast way:
const u64* src = (u64*)splittransfer[0];
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, src, 16);
jASSUME( count == 16 );
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, src, 1);
jASSUME( count == 1 );
u64* dst = (u64*)mtgsThread->GetDataPacketPtr();
dst[0] = src[0];
dst[1] = src[1];
@ -1633,8 +1633,9 @@ static int Vif1TransDirectHL(u32 *data){
if( mtgsThread != NULL )
{
//unaligned copy.VIF handling is -very- messy, so i'l use this code til i fix it :)
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, data, ret<<2 );
memcpy_fast( mtgsThread->GetDataPacketPtr(), data, count );
// Round ret up, just in case it's not 128bit aligned.
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, data, (ret+3)>>2 );
memcpy_fast( mtgsThread->GetDataPacketPtr(), data, count<<4 );
mtgsThread->SendDataPacket();
}
else {

View File

@ -681,7 +681,6 @@ void recBC1T( void ) {
SaveBranchState();
recompileNextInstruction(1);
SetBranchImm(branchTo);
//j32Ptr[1] = JMP32(0);
x86SetJ32(j32Ptr[0]);
@ -691,7 +690,6 @@ void recBC1T( void ) {
recompileNextInstruction(1);
SetBranchImm(pc);
//x86SetJ32(j32Ptr[1]);
}
void recBC1FL( void ) {

View File

@ -45,7 +45,7 @@ namespace VU0micro
{
SuperVUReset(0);
// these shouldn't be needed, but shouldn't hurt anythign either.
// these shouldn't be needed, but shouldn't hurt anything either.
x86FpuState = FPU_STATE;
iCWstate = 0;
}

View File

@ -1970,15 +1970,14 @@ void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
// Chances are this should be a "loops around memory" situation, and the packet
// should be continued starting at addr zero (0).
size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)));
//size = 0x4000-(size<<4)-(addr&0x3fff);
size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4);
jASSUME( size > 0 );
//if( size > 0 )
{
u8* pmem = mtgsThread->GetDataPacketPtr();
memcpy_aligned(pmem, (u8*)pMem+addr, size);
memcpy_aligned(pmem, (u8*)pMem+addr, size<<4);
mtgsThread->SendDataPacket();
}
}
//------------------------------------------------------------------
//------------------------------------------------------------------

View File

@ -443,7 +443,7 @@ void SuperVUReset(int vuindex)
}
// clear the block and any joining blocks
__forceinline void SuperVUClear(u32 startpc, u32 size, int vuindex)
void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex)
{
vector<VuFunctionHeader::RANGE>::iterator itrange;
list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();

View File

@ -23,7 +23,7 @@
#include "iVUmicro.h"
extern void SuperVUAlloc(int vuindex); // global VU resources aare automatically allocated if necessary.
extern void SuperVUAlloc(int vuindex); // global VU resources are automatically allocated if necessary.
extern void SuperVUDestroy(int vuindex); // if vuindex is -1, destroys everything
extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything
@ -37,7 +37,7 @@ extern void svudispfntemp();
#ifdef __LINUX__
}
#endif
extern void SuperVUClear(u32 startpc, u32 size, int vuindex);
extern void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex);
// read = 0, will write to reg
// read = 1, will read from reg