diff --git a/common/PS2Etypes.h b/common/PS2Etypes.h index d980530d66..4c62b47f0f 100644 --- a/common/PS2Etypes.h +++ b/common/PS2Etypes.h @@ -156,6 +156,56 @@ typedef s32 sptr; #endif #endif +// A rough-and-ready cross platform 128-bit datatype, Non-SSE style. +#ifdef __cplusplus +struct u128 +{ + u64 lo; + u64 hi; + + // Implicit conversion from u64 + u128( u64 src ) : + lo( src ) + , hi( 0 ) {} + + // Implicit conversion from u32 + u128( u32 src ) : + lo( src ) + , hi( 0 ) {} +}; + +struct s128 +{ + s64 lo; + s64 hi; + + // Implicit conversion from u64 + s128( s64 src ) : + lo( src ) + , hi( 0 ) {} + + // Implicit conversion from u32 + s128( s32 src ) : + lo( src ) + , hi( 0 ) {} +}; + +#else + +typedef union _u128_t +{ + u64 lo; + u64 hi; +} u128; + +typedef union _s128_t +{ + s64 lo; + s64 hi; +} s128; + +#endif + typedef struct { int size; s8 *data; diff --git a/pcsx2/Cache.h b/pcsx2/Cache.h index da4e542335..62d513430a 100644 --- a/pcsx2/Cache.h +++ b/pcsx2/Cache.h @@ -26,14 +26,14 @@ struct _u8bit_128 { }; -struct u128 { +struct u8bit_128 { _u8bit_128 b8; }; struct _cacheS { u32 tag[2]; - u128 data[2][4]; + u8bit_128 data[2][4]; }; extern _cacheS pCache[64]; diff --git a/pcsx2/Exceptions.h b/pcsx2/Exceptions.h index 411500f0ce..bd1a66007f 100644 --- a/pcsx2/Exceptions.h +++ b/pcsx2/Exceptions.h @@ -104,7 +104,7 @@ namespace Exception RuntimeError( msg ) {} }; - // This exception exception thrown any time an operation is attempted when an object + // This exception thrown any time an operation is attempted when an object // is in an uninitialized state. class InvalidOperation : public LogicError { @@ -114,6 +114,16 @@ namespace Exception LogicError( msg ) {} }; + // Keep those array indexers in bounds when using the SafeArray type, or you'll be + // seeing these. + class IndexBoundsFault : public LogicError + { + public: + virtual ~IndexBoundsFault() throw() {} + explicit IndexBoundsFault( const std::string& msg="Array index is outsides the bounds of an array." ) : + LogicError( msg ) {} + }; + class HardwareDeficiency : public RuntimeError { public: diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index 739320c833..e69338a09b 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -120,8 +120,8 @@ void WriteFIFO(u32 mem, const u64 *value) { if( mtgsThread != NULL ) { - const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 16 ); - jASSUME( count == 16 ); + const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 1 ); + jASSUME( count == 1 ); u64* data = (u64*)mtgsThread->GetDataPacketPtr(); data[0] = value[0]; data[1] = value[1]; diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 8fef4146ea..c4673385e0 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -552,7 +552,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc) if( mtgsThread != NULL ) { int sizetoread = (qwc)<<4; - sizetoread = mtgsThread->PrepDataPacket( GIF_PATH_3, pMem, sizetoread ); + sizetoread = mtgsThread->PrepDataPacket( GIF_PATH_3, pMem, qwc ); u8* pgsmem = mtgsThread->GetDataPacketPtr(); /* check if page of endmem is valid (dark cloud2) */ @@ -579,7 +579,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc) } else #endif - memcpy_aligned(pgsmem, pMem, sizetoread); + memcpy_aligned(pgsmem, pMem, sizetoread<<4); mtgsThread->SendDataPacket(); } diff --git a/pcsx2/GS.h b/pcsx2/GS.h index d60afa4e06..d1c2e29149 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -112,7 +112,9 @@ struct GIFPath ///////////////////////////////////////////////////////////////////////////// // MTGS Threaded Class Declaration -#define MTGS_RINGBUFFERSIZE 0x00300000 // 3Mb +// Uncomment this to enable the MTGS debug stack, which tracks to ensure reads +// and writes stay synchronized. Warning: the debug stack is VERY slow. +//#define RINGBUF_DEBUG_STACK enum GIF_PATH { @@ -143,18 +145,29 @@ enum GS_RINGTYPE , GS_RINGTYPE_STARTTIME // special case for min==max fps frameskip settings }; - class mtgsThreadObject : public Threading::Thread { friend class SaveState; protected: - // note: when g_pGSRingPos == g_pGSWritePos, the fifo is empty - const u8* m_RingPos; // cur pos gs is reading from - u8* m_WritePos; // cur pos ee thread is writing to - const u8* const m_RingBufferEnd; // pointer to the end of the ringbuffer (used to detect buffer wraps) + // Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s. + // (actual size is 1< m_RingBuffer; + // mtgs needs its own memory space separate from the PS2. The PS2 memory is in // synch with the EE while this stays in sync with the GS (ie, it lags behind) - PCSX2_ALIGNED16( u8 m_gsMem[0x2000] ); - - PCSX2_ALIGNED( 4096, u8 m_RingBuffer[MTGS_RINGBUFFERSIZE] ); + u8* const m_gsMem; public: mtgsThreadObject(); @@ -225,8 +241,8 @@ protected: u32 _gifTransferDummy( GIF_PATH pathidx, const u8 *pMem, u32 size ); // Used internally by SendSimplePacket type functions - const u8* _PrepForSimplePacket(); - void _FinishSimplePacket( const u8* future_writepos ); + uint _PrepForSimplePacket(); + void _FinishSimplePacket( uint future_writepos ); int Callback(); }; diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index a367df92d6..690bdd3308 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -49,8 +49,8 @@ using namespace std; // This allows us to delacre the vars as non-volatile and only use // them as volatile when appropriate (more optimized). -#define volatize(x) (*(u8* volatile*)&(x)) // for writepos -#define volatize_c(x) (*(u8 * volatile*)&(x)) // for readpos +#define volatize(x) (*reinterpret_cast(&(x))) // for writepos +//#define volatize_c(x) (*(volatile u32*)&(x)) // for readpos ///////////////////////////////////////////////////////////////////////////// // BEGIN -- MTGS GIFtag Parse Implementation @@ -164,27 +164,23 @@ static void RegHandlerLABEL(const u32* data) GSSIGLBLID->LBLID = (GSSIGLBLID->LBLID&~data[1])|(data[0]&data[1]); } -// END -- MTGS GIFtag Parse Implementation +// END -- MTGS GIFtag Parse Implementation ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -// MTGS Threaded Class Implementation +// MTGS Threaded Class Implementation mtgsThreadObject* mtgsThread = NULL; -// Uncomment this to enable the MTGS debug stack, which tracks to ensure reads -// and writes stay synchronized. Warning: the debug stack is VERY slow. -//#define RINGBUF_DEBUG_STACK #ifdef RINGBUF_DEBUG_STACK #include -std::list ringposStack; -mutex_t stackLock; +std::list ringposStack; #endif #ifdef _DEBUG // debug variable used to check for bad code bits where copies are started // but never closed, or closed without having been started. (GSRingBufCopy calls -// should always be followed by acall to GSRINGBUF_DONECOPY) +// should always be followed by a call to GSRINGBUF_DONECOPY) static int copyLock = 0; #endif @@ -192,27 +188,29 @@ typedef void (*GIFRegHandler)(const u32* data); static GIFRegHandler s_GSHandlers[3] = { RegHandlerSIGNAL, RegHandlerFINISH, RegHandlerLABEL }; mtgsThreadObject::mtgsThreadObject() : - m_RingPos( m_RingBuffer ) -, m_WritePos( m_RingBuffer ) -, m_RingBufferEnd( m_RingBuffer + sizeof( m_RingBuffer ) ) + m_RingPos( 0 ) +, m_WritePos( 0 ) -, m_wait_InitDone() +, m_post_InitDone() , m_lock_RingRestart() , m_CopyCommandTally( 0 ) , m_CopyDataTally( 0 ) , m_RingBufferIsBusy( 0 ) -, m_packet_size() -, m_packet_data( NULL ) +, m_packet_size( 0 ) +, m_packet_ringpos( 0 ) #ifdef RINGBUF_DEBUG_STACK , m_lock_Stack() #endif +, m_RingBuffer( m_RingBufferSize + (Ps2MemSize::GSregs/sizeof(u128)) ) +, m_gsMem( (u8*)m_RingBuffer.GetPtr( m_RingBufferSize ) ) { // Wait for the thread to finish initialization (it runs GSinit, which can take // some time since it's creating a new window and all), and then check for errors. - m_wait_InitDone.Wait(); + m_post_event.Post(); // tell MTGS we're done here + m_post_InitDone.Wait(); // and wait for MTGS to be done there! if( m_returncode != 0 ) // means the thread failed to init the GS plugin throw Exception::PluginFailure( "GS", "The GS plugin failed to open/initialize." ); @@ -233,7 +231,7 @@ void mtgsThreadObject::Reset() // * Signal a reset. // * clear the path and byRegs structs (used by GIFtagDummy) - AtomicExchangePointer( m_RingPos, m_WritePos ); + AtomicExchange( m_RingPos, m_WritePos ); MTGS_LOG( "MTGS > Sending Reset...\n" ); SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 ); @@ -406,14 +404,15 @@ __forceinline u32 mtgsThreadObject::_gifTransferDummy( GIF_PATH pathidx, const u } } - // FIXME: dq8, pcsx2 error probably - if(pathidx == 0) { if(!path.tag.eop && path.tag.nloop > 0) { path.tag.nloop = 0; DevCon::Write( "path1 hack! " ); + + // This means that the giftag data got screwly somewhere + // along the way (often means curreg was in a bad state or something) } } #ifdef PCSX2_GSRING_SAMPLING_STATS @@ -426,119 +425,142 @@ __forceinline u32 mtgsThreadObject::_gifTransferDummy( GIF_PATH pathidx, const u return size; } +struct PacketTagType +{ + u32 command; + u32 data[3]; +}; + int mtgsThreadObject::Callback() { Console::WriteLn("MTGS > Thread Started, Opening GS Plugin..."); + // Wait for the MTGS to initialize structures. + m_post_event.Wait(); + memcpy_aligned( m_gsMem, PS2MEM_GS, sizeof(m_gsMem) ); GSsetBaseMem( m_gsMem ); m_returncode = GSopen((void *)&pDsp, "PCSX2", 1); GSCSRr = 0x551B400F; // 0x55190000 - m_wait_InitDone.Set(); + m_post_InitDone.Post(); if (m_returncode != 0) { return m_returncode; } // error msg will be issued to the user by Plugins.c Console::WriteLn("MTGS > GSopen Finished."); #ifdef RINGBUF_DEBUG_STACK - u32 prevCmd=0; + PacketTagType prevCmd; #endif while( !m_sigterm ) { - m_wait_event.Wait(); + m_post_event.Wait(); + //if( m_sigterm ) break; + AtomicExchange( m_RingBufferIsBusy, 1 ); // note: m_RingPos is intentionally not volatile, because it should only // ever be modified by this thread. while( m_RingPos != volatize(m_WritePos)) { - assert( m_RingPos < m_RingBufferEnd ); + assert( m_RingPos < m_RingBufferSize ); - u32 tag = *(u32*)m_RingPos; - u32 ringposinc = 16; + const PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_RingPos]; + u32 ringposinc = 1; #ifdef RINGBUF_DEBUG_STACK // pop a ringpos off the stack. It should match this one! - EnterCriticalSection( &stackLock ); + m_lock_Stack.Lock(); uptr stackpos = ringposStack.back(); - if( stackpos != (uptr)m_RingPos ) + if( stackpos != m_RingPos ) { - Console::Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, (long)m_RingPos, prevCmd ); + Console::Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", params stackpos, m_RingPos, prevCmd.command ); } - assert( stackpos == (long)m_RingPos ); + assert( stackpos == m_RingPos ); prevCmd = tag; ringposStack.pop_back(); - LeaveCriticalSection( &stackLock ); + m_lock_Stack.Unlock(); #endif - switch( tag&0xffff ) + switch( tag.command ) { case GS_RINGTYPE_RESTART: - AtomicExchangePointer(m_RingPos, m_RingBuffer); + AtomicExchange(m_RingPos, 0); // stall for a bit to let the MainThread have time to update the g_pGSWritePos. m_lock_RingRestart.Lock(); m_lock_RingRestart.Unlock(); - continue; + continue; case GS_RINGTYPE_P1: { - int qsize = (tag>>16); + const int qsize = tag.data[0]; + const u128* data = m_RingBuffer.GetPtr( m_RingPos+1 ); + // make sure that tag>>16 is the MAX size readable - GSgifTransfer1((u32*)(m_RingPos+16) - 0x1000 + 4*qsize, 0x4000-qsize*16); - ringposinc += qsize<<4; - break; + //GSgifTransfer1(((u32*)data) - 0x1000 + 4*qsize, 0x4000-qsize*16); + GSgifTransfer1((u32*)(data - 0x400 + qsize), 0x4000-qsize*16); + ringposinc += qsize; } + break; + case GS_RINGTYPE_P2: - GSgifTransfer2((u32*)(m_RingPos+16), tag>>16); - ringposinc += (tag>>16)<<4; - break; + { + const int qsize = tag.data[0]; + const u128* data = m_RingBuffer.GetPtr( m_RingPos+1 ); + GSgifTransfer2((u32*)data, qsize); + ringposinc += qsize; + } + break; + case GS_RINGTYPE_P3: - GSgifTransfer3((u32*)(m_RingPos+16), tag>>16); - ringposinc += (tag>>16)<<4; - break; + { + const int qsize = tag.data[0]; + const u128* data = m_RingBuffer.GetPtr( m_RingPos+1 ); + GSgifTransfer3((u32*)data, qsize); + ringposinc += qsize; + } + break; + case GS_RINGTYPE_VSYNC: { - GSvsync(*(u32*)(m_RingPos+4)); + GSvsync(tag.data[0]); - gsFrameSkip( !( *(u32*)(m_RingPos+8) ) ); + gsFrameSkip( !tag.data[1] ); if( PAD1update != NULL ) PAD1update(0); if( PAD2update != NULL ) PAD2update(1); - - break; } + break; case GS_RINGTYPE_FRAMESKIP: _gs_ResetFrameskip(); - break; + break; case GS_RINGTYPE_MEMWRITE8: - m_gsMem[*(u32*)(m_RingPos+4)] = *(u8*)(m_RingPos+8); - break; + m_gsMem[tag.data[0]] = (u8)tag.data[1]; + break; case GS_RINGTYPE_MEMWRITE16: - *(u16*)(m_gsMem+*(u32*)(m_RingPos+4)) = *(u16*)(m_RingPos+8); - break; + *(u16*)(m_gsMem+tag.data[0]) = (u16)tag.data[1]; + break; case GS_RINGTYPE_MEMWRITE32: - *(u32*)(m_gsMem+*(u32*)(m_RingPos+4)) = *(u32*)(m_RingPos+8); - break; + *(u32*)(m_gsMem+tag.data[0]) = tag.data[1]; + break; case GS_RINGTYPE_MEMWRITE64: - *(u64*)(m_gsMem+*(u32*)(m_RingPos+4)) = *(u64*)(m_RingPos+8); - break; + *(u64*)(m_gsMem+tag.data[0]) = *(u64*)&tag.data[1]; + break; case GS_RINGTYPE_FREEZE: { - //SaveState* f = (SaveState*)(*(uptr*)(m_RingPos+8)); - freezeData* data = (freezeData*)(*(uptr*)(m_RingPos+8)); - int mode = *(s32*)(m_RingPos+4); + freezeData* data = (freezeData*)(*(uptr*)&tag.data[1]); + int mode = tag.data[0]; GSfreeze( mode, data ); break; } case GS_RINGTYPE_RECORD: { - int record = *(u32*)(m_RingPos+4); + int record = tag.data[0]; if( GSsetupRecording != NULL ) GSsetupRecording(record, NULL); if( SPU2setupRecording != NULL ) SPU2setupRecording(record, NULL); break; @@ -551,27 +573,27 @@ int mtgsThreadObject::Callback() case GS_RINGTYPE_SOFTRESET: { - int mask = *(u32*)(m_RingPos+4); + int mask = tag.data[0]; MTGS_LOG( "MTGS > Receiving GIF Soft Reset (mask: %d)\n", mask ); GSgifSoftReset( mask ); break; } case GS_RINGTYPE_WRITECSR: - GSwriteCSR( *(u32*)(m_RingPos+4) ); + GSwriteCSR( tag.data[0] ); break; case GS_RINGTYPE_MODECHANGE: - _gs_ChangeTimings( *(u32*)(m_RingPos+4), *(u32*)(m_RingPos+8) ); + _gs_ChangeTimings( tag.data[0], tag.data[1] ); break; case GS_RINGTYPE_STARTTIME: - m_iSlowStart += *(u32*)(m_RingPos+4); + m_iSlowStart += tag.data[0]; break; #ifdef PCSX2_DEVBUILD default: - Console::Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", params tag, m_RingPos, m_WritePos); + Console::Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", params tag.command, m_RingPos, m_WritePos); assert(0); m_RingPos = m_WritePos; continue; @@ -581,12 +603,10 @@ int mtgsThreadObject::Callback() #endif } - const u8* newringpos = m_RingPos + ringposinc; - assert( newringpos <= m_RingBufferEnd ); - if( newringpos == m_RingBufferEnd ) - newringpos = m_RingBuffer; - - AtomicExchangePointer( m_RingPos, newringpos ); + uint newringpos = m_RingPos + ringposinc; + assert( newringpos <= m_RingBufferSize ); + newringpos &= m_RingBufferMask; + AtomicExchange( m_RingPos, newringpos ); } AtomicExchange( m_RingBufferIsBusy, 0 ); } @@ -616,7 +636,7 @@ void mtgsThreadObject::WaitGS() // For use in loops that wait on the GS thread to do certain things. void mtgsThreadObject::SetEvent() { - m_wait_event.Set(); + m_post_event.Post(); m_CopyCommandTally = 0; m_CopyDataTally = 0; } @@ -635,30 +655,28 @@ void mtgsThreadObject::SetEventWait() u8* mtgsThreadObject::GetDataPacketPtr() const { - return m_packet_data; + return (u8*)m_RingBuffer.GetPtr( m_packet_ringpos ); } // Closes the data packet send command, and initiates the gs thread (if needed). void mtgsThreadObject::SendDataPacket() { // make sure a previous copy block has been started somewhere. - jASSUME( m_packet_data != NULL ); + jASSUME( m_packet_size != 0 ); - const u8* temp = m_packet_data + m_packet_size; - - jASSUME( temp <= m_RingBufferEnd ); - if( temp == m_RingBufferEnd ) - temp = m_RingBuffer; + uint temp = m_packet_ringpos + m_packet_size; + jASSUME( temp <= m_RingBufferSize ); + temp &= m_RingBufferMask; #ifdef _DEBUG - else + if( m_packet_ringpos + m_packet_size < m_RingBufferSize ) { - const u8* readpos = volatize(m_RingPos); + uint readpos = volatize(m_RingPos); if( readpos != m_WritePos ) { // The writepos should never leapfrog the readpos // since that indicates a bad write. - if( m_packet_data < readpos ) + if( m_packet_ringpos < readpos ) assert( temp < readpos ); } @@ -669,9 +687,9 @@ void mtgsThreadObject::SendDataPacket() } #endif - AtomicExchangePointer( m_WritePos, temp ); + AtomicExchange( m_WritePos, temp ); - m_packet_data = NULL; + m_packet_size = 0; if( m_RingBufferIsBusy ) return; @@ -689,7 +707,7 @@ void mtgsThreadObject::SendDataPacket() // 8 - roughly 2% slower on HT machines. m_CopyDataTally += m_packet_size; - if( ( m_CopyDataTally > 0x40000 ) || ( ++m_CopyCommandTally > 16 ) ) + if( ( m_CopyDataTally > 0x4000 ) || ( ++m_CopyCommandTally > 16 ) ) { FreezeXMMRegs(1); FreezeMMXRegs(1); @@ -727,6 +745,8 @@ static u32 GSRingBufCopySz = 0; // returns the amount of giftag data not processed (in simd128 values). // Return value is used by VU1 XGKICK to hack-fix data packets which are too // large for VU1 memory. +// Parameters: +// size - size of the packet data, in smd128's int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size ) { #ifdef PCSX2_GSRING_TX_STATS @@ -777,34 +797,31 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s // interlocked exchanges when we modify it, however, since the GS thread // is reading it. - const u8 *writepos = m_WritePos; + uint writepos = m_WritePos; // Checks if a previous copy was started without an accompanying call to GSRINGBUF_DONECOPY - jASSUME( m_packet_data == NULL ); + jASSUME( m_packet_size == 0 ); // Sanity checks! (within the confines of our ringbuffer please!) - jASSUME( size < MTGS_RINGBUFFERSIZE ); - jASSUME( writepos < m_RingBufferEnd ); - - // Alignment checks! (16 bytes please!) - jASSUME( ((uptr)writepos & 15) == 0 ); - //jASSUME( (size&15) == 0); + jASSUME( size < m_RingBufferSize ); + jASSUME( writepos < m_RingBufferSize ); //fixme: Vif sometimes screws up and size is unaligned, try this then (rama) - if( (size&15) != 0){ + // Is this still a problem? It should be fixed on the specific VIF command now. (air) + /*if( (size&15) != 0){ Console::Error( "MTGS problem, size unaligned"); size = (size+15)&(~15); - } + }*/ // retval has the amount of data *not* processed, so we only need to reserve // enough room for size - retval: - int retval = _gifTransferDummy( pathidx, srcdata, size>>4 ); + int retval = _gifTransferDummy( pathidx, srcdata, size ); - size = size - (retval<<4); + size = size - retval; m_packet_size = size; - size += 16; // takes into account our command qword. + size++; // takes into account our command qword. - if( writepos + size < m_RingBufferEnd ) + if( writepos + size < m_RingBufferSize ) { // generic gs wait/stall. // Waits until the readpos is outside the scope of the write area. @@ -812,7 +829,7 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s { // two conditionals in the following while() loop, so precache // the readpos for more efficient behavior: - const u8* readpos = volatize_c(m_RingPos); + uint readpos = volatize(m_RingPos); // if the writepos is past the readpos then we're safe: if( writepos >= readpos ) break; @@ -824,7 +841,7 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s SetEventWait(); } } - else if( writepos + size > m_RingBufferEnd ) + else if( writepos + size > m_RingBufferSize ) { // If the incoming packet doesn't fit, then start over from // the start of the ring buffer (it's a lot easier than trying @@ -836,7 +853,7 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s while( true ) { - const u8* readpos = volatize(m_RingPos); + uint readpos = volatize(m_RingPos); // is the buffer empty? if( readpos == writepos ) break; @@ -844,22 +861,22 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s // Also: Wait for the readpos to go past the start of the buffer // Otherwise it'll stop dead in its tracks when we set the new write // position below (bad!) - if( readpos < writepos && readpos != m_RingBuffer ) break; + if( readpos < writepos && readpos != 0 ) break; SetEventWait(); } m_lock_RingRestart.Lock(); SendSimplePacket( GS_RINGTYPE_RESTART, 0, 0, 0 ); - writepos = m_RingBuffer; - AtomicExchangePointer( m_WritePos, writepos ); + writepos = 0; + AtomicExchange( m_WritePos, writepos ); m_lock_RingRestart.Unlock(); // stall until the read position is past the end of our incoming block, // or until it reaches the current write position (signals an empty buffer). while( true ) { - const u8* readpos = volatize(m_RingPos); + uint readpos = volatize(m_RingPos); if( readpos == m_WritePos ) break; if( writepos+size < readpos ) break; @@ -874,48 +891,48 @@ int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 s //SysPrintf( "MTGS > Perfect Fit!\n"); while( true ) { - const u8* readpos = volatize(m_RingPos); + uint readpos = volatize(m_RingPos); // is the buffer empty? Don't wait... if( readpos == writepos ) break; // Copy is ready so long as readpos is less than writepos and *not* // equal to the base of the ringbuffer (otherwise the buffer will stop) - if( readpos < writepos && readpos != m_RingBuffer ) break; + if( readpos < writepos && readpos != 0 ) break; SetEventWait(); } } #ifdef RINGBUF_DEBUG_STACK - mutex_lock( stackLock ); - ringposStack.push_front( (uptr)writepos ); - mutex_unlock( stackLock ); + m_lock_Stack.Lock(); + ringposStack.push_front( writepos ); + m_lock_Stack.Unlock(); #endif // Command qword: Low word is the command, and the high word is the packet // length in SIMDs (128 bits). - const uint simd_size = (m_packet_size>>4); // minus the command byte! - *(u32*)m_WritePos = (pathidx+1) | (simd_size<<16); - m_packet_data = m_WritePos + 16; + PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos]; + tag.command = pathidx+1; + tag.data[0] = m_packet_size; + m_packet_ringpos = m_WritePos + 1; return m_packet_size; } -__forceinline const u8* mtgsThreadObject::_PrepForSimplePacket() +__forceinline uint mtgsThreadObject::_PrepForSimplePacket() { #ifdef RINGBUF_DEBUG_STACK m_lock_Stack.Lock(); - ringposStack.push_front( (uptr)m_WritePos ); + ringposStack.push_front( m_WritePos ); m_lock_Stack.Unlock(); #endif - const u8* future_writepos = m_WritePos+16; - jASSUME( future_writepos <= m_RingBufferEnd ); + uint future_writepos = m_WritePos+1; + jASSUME( future_writepos <= m_RingBufferSize ); - if( future_writepos >= m_RingBufferEnd ) - future_writepos = m_RingBuffer; + future_writepos &= m_RingBufferMask; while( future_writepos == volatize(m_RingPos) ) SetEventWait(); @@ -923,31 +940,33 @@ __forceinline const u8* mtgsThreadObject::_PrepForSimplePacket() return future_writepos; } -__forceinline void mtgsThreadObject::_FinishSimplePacket( const u8* future_writepos ) +__forceinline void mtgsThreadObject::_FinishSimplePacket( uint future_writepos ) { assert( future_writepos != volatize(m_RingPos) ); - AtomicExchangePointer( m_WritePos, future_writepos ); + AtomicExchange( m_WritePos, future_writepos ); } void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, int data2 ) { - const u8* const thefuture = _PrepForSimplePacket(); + const uint thefuture = _PrepForSimplePacket(); + PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos]; - *(u32*)m_WritePos = type; - *(u32*)(m_WritePos+4) = data0; - *(u32*)(m_WritePos+8) = data1; - *(u32*)(m_WritePos+12) = data2; + tag.command = type; + tag.data[0] = data0; + tag.data[1] = data1; + tag.data[2] = data2; _FinishSimplePacket( thefuture ); } void mtgsThreadObject::SendPointerPacket( GS_RINGTYPE type, u32 data0, void* data1 ) { - const u8* const thefuture = _PrepForSimplePacket(); + const uint thefuture = _PrepForSimplePacket(); + PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos]; - *(u32*)m_WritePos = type; - *(u32*)(m_WritePos+4) = data0; - *(uptr*)(m_WritePos+8) = (uptr)data1; + tag.command = type; + tag.data[0] = data0; + *(uptr*)&tag.data[1] = (uptr)data1; _FinishSimplePacket( thefuture ); } @@ -1004,4 +1023,4 @@ void mtgsThreadObject::Freeze( SaveState& state ) void mtgsRingBufSimplePacket( s32 command, u32 data0, u32 data1, u32 data2 ) { mtgsThread->SendSimplePacket( (GS_RINGTYPE)command, data0, data1, data2 ); -} \ No newline at end of file +} diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 1213232f4b..d4cd06dfe7 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -174,6 +174,9 @@ void memMapVUmicro() { vtlb_MapHandler(vu0_micro_mem[CHECK_VU0REC ? 0 : 1],0x11000000,0x00004000); vtlb_MapHandler(vu1_micro_mem[CHECK_VU1REC ? 0 : 1],0x11008000,0x00004000); + + vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000); + vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000); } void memMapPhy() @@ -193,9 +196,6 @@ void memMapPhy() //IOP mem vtlb_MapBlock(psxM,0x1c000000,0x00800000); - vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000); - vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000); - //These fallback to mem* stuff ... vtlb_MapHandler(tlb_fallback_1,0x10000000,0x10000); vtlb_MapHandler(tlb_fallback_6,0x12000000,0x10000); @@ -455,11 +455,22 @@ void __fastcall _ext_memWrite128(u32 mem, const u64 *value) typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc ); template -static __forceinline ClearFunc_t& GetClearFunc() +static __forceinline void ClearVuFunc( u32 addr, u32 size ) { - return dynarec ? - (( vunum==0 ) ? VU0micro::recClear : VU1micro::recClear) - : (( vunum==0 ) ? VU0micro::intClear : VU1micro::intClear); + if( dynarec ) + { + if( vunum==0 ) + VU0micro::recClear(addr,size); + else + VU1micro::recClear(addr,size); + } + else + { + if( vunum==0 ) + VU0micro::intClear(addr,size); + else + VU1micro::intClear(addr,size); + } } template @@ -521,7 +532,7 @@ void __fastcall vuMicroWrite8(u32 addr,mem8_t data) { vu.Micro[addr]=data; - GetClearFunc()(addr&(~7),1); + ClearVuFunc(addr&(~7),1); } } @@ -535,7 +546,7 @@ void __fastcall vuMicroWrite16(u32 addr,mem16_t data) { *(u16*)&vu.Micro[addr]=data; - GetClearFunc()(addr&(~7),1); + ClearVuFunc(addr&(~7),1); } } @@ -549,7 +560,7 @@ void __fastcall vuMicroWrite32(u32 addr,mem32_t data) { *(u32*)&vu.Micro[addr]=data; - GetClearFunc()(addr&(~7),1); + ClearVuFunc(addr&(~7),1); } } @@ -563,7 +574,7 @@ void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data) { *(u64*)&vu.Micro[addr]=data[0]; - GetClearFunc()(addr,1); + ClearVuFunc(addr,1); } } @@ -578,7 +589,7 @@ void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data) *(u64*)&vu.Micro[addr]=data[0]; *(u64*)&vu.Micro[addr+8]=data[1]; - GetClearFunc()(addr,2); + ClearVuFunc(addr,2); } } @@ -696,7 +707,6 @@ void memReset() vtlb_Init(); tlb_fallback_0=vtlb_RegisterHandlerTempl1(_ext_mem,0); - //tlb_fallback_1=vtlb_RegisterHandlerTempl1(_ext_mem,1); tlb_fallback_2=vtlb_RegisterHandlerTempl1(_ext_mem,2); tlb_fallback_3=vtlb_RegisterHandlerTempl1(_ext_mem,3); tlb_fallback_4=vtlb_RegisterHandlerTempl1(_ext_mem,4); diff --git a/pcsx2/Memory.h b/pcsx2/Memory.h index 07d14cba17..1064e247b3 100644 --- a/pcsx2/Memory.h +++ b/pcsx2/Memory.h @@ -33,14 +33,16 @@ namespace Ps2MemSize { static const uint Base = 0x02000000; // 32 MB main memory! static const uint Rom = 0x00400000; // 4 MB main rom - static const uint Rom1 = 0x00040000; // fixme - TLB allocates 0x00080000 ? - static const uint Rom2 = 0x00080000; - static const uint ERom = 0x001C0000; + static const uint Rom1 = 0x00040000; // DVD player + static const uint Rom2 = 0x00080000; // Chinese rom extension (?) + static const uint ERom = 0x001C0000; // DVD player extensions (?) static const uint Hardware = 0x00010000; - static const uint Scratch = 0x00004000; // fixme - VM allocates 0x10000 ? + static const uint Scratch = 0x00004000; - static const uint IopRam = 0x200000; // 2MB main ram on the IOP. + static const uint IopRam = 0x00200000; // 2MB main ram on the IOP. static const uint IopHardware = 0x00010000; + + static const uint GSregs = 0x00002000; // 8k for the GS registers and stuff. } #ifdef PCSX2_VIRTUAL_MEM diff --git a/pcsx2/Plugins.cpp b/pcsx2/Plugins.cpp index a96a6cc140..b31345c5a2 100644 --- a/pcsx2/Plugins.cpp +++ b/pcsx2/Plugins.cpp @@ -622,14 +622,28 @@ void ShutdownPlugins() OpenStatus.GS = false; } - GSshutdown(); - PAD1shutdown(); - PAD2shutdown(); - SPU2shutdown(); - CDVDshutdown(); - DEV9shutdown(); - USBshutdown(); - FWshutdown(); + if( GSshutdown != NULL ) + GSshutdown(); + + if( PAD1shutdown != NULL ) + PAD1shutdown(); + if( PAD2shutdown != NULL ) + PAD2shutdown(); + + if( SPU2shutdown != NULL ) + SPU2shutdown(); + + if( CDVDshutdown != NULL ) + CDVDshutdown(); + + if( DEV9shutdown != NULL ) + DEV9shutdown(); + + if( USBshutdown != NULL ) + USBshutdown(); + + if( FWshutdown != NULL ) + FWshutdown(); } int LoadPlugins() { @@ -867,4 +881,4 @@ void PluginsResetGS() int ret = GSinit(); if (ret != 0) { Msgbox::Alert("GSinit error: %d", params ret); } -} \ No newline at end of file +} diff --git a/pcsx2/System.h b/pcsx2/System.h index 220c7ac1f9..538b34dd92 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -283,6 +283,25 @@ protected: int m_size; // size of the allocation of memory const static std::string m_str_Unnamed; +protected: + // Internal contructor for use by derrived classes. This allws a derrived class to + // use its own memory allocation (with an aligned memory, for example). + // Throws: + // Exception::OutOfMemory if the allocated_mem pointr is NULL. + explicit MemoryAlloc( const std::string& name, T* allocated_mem, int initSize ) : + Name( name ) + , ChunkSize( DefaultChunkSize ) + , m_ptr( allocated_mem ) + , m_size( initSize ) + { + if( m_ptr == NULL ) + throw Exception::OutOfMemory(); + } + + virtual T* _virtual_realloc( int newsize ) + { + return (T*)realloc( m_ptr, newsize * sizeof(T) ); + } public: virtual ~MemoryAlloc() @@ -322,7 +341,7 @@ public: if( blockSize > m_size ) { const uint newalloc = blockSize + ChunkSize; - m_ptr = (T*)realloc( m_ptr, newalloc * sizeof(T) ); + m_ptr = _virtual_realloc( newalloc ); if( m_ptr == NULL ) { throw Exception::OutOfMemory( @@ -353,19 +372,69 @@ public: } protected: + // A safe array index fetcher. Throws an exception if the array index + // is outside the bounds of the array. + // Performance Considerations: This function adds quite a bit of overhead + // to array indexing and thus should be done infrequently if used in + // time-critical situations. Indead of using it from inside loops, cache + // the pointer into a local variable and use stad (unsafe) C indexes. T* _getPtr( uint i ) const { +#ifdef PCSX2_DEVBUILD if( i >= (uint)m_size ) { - throw std::out_of_range( + throw Exception::IndexBoundsFault( "Index out of bounds on MemoryAlloc: " + Name + " (index=" + to_string(i) + ", size=" + to_string(m_size) + ")" ); } +#endif return &m_ptr[i]; } }; +template< typename T, uint Alignment > +class SafeAlignedArray : public MemoryAlloc +{ +protected: + T* _virtual_realloc( int newsize ) + { + // TODO : aligned_realloc will need a linux implementation now. -_- + return (T*)_aligned_realloc( m_ptr, newsize * sizeof(T), Alignment ); + } + + // Appends "(align: xx)" to the name of the allocation in devel builds. + // Maybe useful,maybe not... no harm in atatching it. :D + string _getName( const string& src ) + { +#ifdef PCSX2_DEVBUILD + return src + "(align:" + to_string(Alignment) + ")"; +#endif + return src; + } + +public: + virtual ~SafeAlignedArray() + { + safe_aligned_free( m_ptr ); + // mptr is set to null, so the parent class's destructor won't re-free it. + } + + explicit SafeAlignedArray( const std::string& name="Unnamed" ) : + MemoryAlloc( name ) + { + } + + explicit SafeAlignedArray( int initialSize, const std::string& name="Unnamed" ) : + MemoryAlloc( + _getName(name), + (T*)_aligned_malloc( initialSize * sizeof(T), Alignment ), + initialSize + ) + { + } +}; + #endif /* __SYSTEM_H__ */ diff --git a/pcsx2/ThreadTools.cpp b/pcsx2/ThreadTools.cpp index c3fcb177ce..a82bf8db9e 100644 --- a/pcsx2/ThreadTools.cpp +++ b/pcsx2/ThreadTools.cpp @@ -28,7 +28,7 @@ namespace Threading , m_returncode( 0 ) , m_terminated( false ) , m_sigterm( 0 ) - , m_wait_event() + , m_post_event() { if( pthread_create( &m_thread, NULL, _internal_callback, this ) != 0 ) throw Exception::ThreadCreationError(); @@ -42,7 +42,7 @@ namespace Threading void Thread::Close() { AtomicExchange( m_sigterm, 1 ); - m_wait_event.Set(); + m_post_event.Post(); pthread_join( m_thread, NULL ); } @@ -82,6 +82,38 @@ namespace Threading pthread_mutex_unlock( &mutex ); } + Semaphore::Semaphore() + { + sem_init( &sema, false, 0 ); + } + + Semaphore::~Semaphore() + { + sem_destroy( &sema ); + } + + void Semaphore::Post() + { + sem_post( &sema ); + } + + void Semaphore::Post( int multiple ) + { + sem_post_multiple( &sema, multiple ); + } + + void Semaphore::Wait() + { + sem_wait( &sema ); + } + + int Semaphore::Count() + { + int retval; + sem_getvalue( &sema, &retval ); + return retval; + } + MutexLock::MutexLock() { int err = 0; diff --git a/pcsx2/Threading.h b/pcsx2/Threading.h index 94aeb7630c..11cb15f321 100644 --- a/pcsx2/Threading.h +++ b/pcsx2/Threading.h @@ -20,6 +20,7 @@ #define _THREADING_H_ #include // EBUSY +#include #include "PS2Etypes.h" #include "Exceptions.h" @@ -41,6 +42,19 @@ namespace Threading void Wait(); }; + struct Semaphore + { + sem_t sema; + + Semaphore(); + ~Semaphore(); + + void Post(); + void Post( int multiple ); + void Wait(); + int Count(); + }; + struct MutexLock { pthread_mutex_t mutex; @@ -70,7 +84,7 @@ namespace Threading int m_returncode; // value returned from the thread on close. bool m_terminated; // set true after the thread has been closed. u32 m_sigterm; // set to true(1) when the thread has been requested to exit. - WaitEvent m_wait_event; // general wait event that's needed by most threads. + Semaphore m_post_event; // general wait event that's needed by most threads. public: virtual ~Thread(); diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index c709895ef2..38a54ee172 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -1587,8 +1587,8 @@ static int Vif1TransDirectHL(u32 *data){ { // copy 16 bytes the fast way: const u64* src = (u64*)splittransfer[0]; - const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, src, 16); - jASSUME( count == 16 ); + const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, src, 1); + jASSUME( count == 1 ); u64* dst = (u64*)mtgsThread->GetDataPacketPtr(); dst[0] = src[0]; dst[1] = src[1]; @@ -1633,8 +1633,9 @@ static int Vif1TransDirectHL(u32 *data){ if( mtgsThread != NULL ) { //unaligned copy.VIF handling is -very- messy, so i'l use this code til i fix it :) - const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, data, ret<<2 ); - memcpy_fast( mtgsThread->GetDataPacketPtr(), data, count ); + // Round ret up, just in case it's not 128bit aligned. + const uint count = mtgsThread->PrepDataPacket( GIF_PATH_2, data, (ret+3)>>2 ); + memcpy_fast( mtgsThread->GetDataPacketPtr(), data, count<<4 ); mtgsThread->SendDataPacket(); } else { diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 5e70b9aaea..da296ff6d0 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -681,7 +681,6 @@ void recBC1T( void ) { SaveBranchState(); recompileNextInstruction(1); SetBranchImm(branchTo); - //j32Ptr[1] = JMP32(0); x86SetJ32(j32Ptr[0]); @@ -691,7 +690,6 @@ void recBC1T( void ) { recompileNextInstruction(1); SetBranchImm(pc); - //x86SetJ32(j32Ptr[1]); } void recBC1FL( void ) { diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index c8a5131a2d..28a5031e20 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -45,7 +45,7 @@ namespace VU0micro { SuperVUReset(0); - // these shouldn't be needed, but shouldn't hurt anythign either. + // these shouldn't be needed, but shouldn't hurt anything either. x86FpuState = FPU_STATE; iCWstate = 0; } diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index ca68eb3bc0..99ed50d218 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -1970,15 +1970,14 @@ void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) // Chances are this should be a "loops around memory" situation, and the packet // should be continued starting at addr zero (0). - size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff))); - //size = 0x4000-(size<<4)-(addr&0x3fff); + size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4); jASSUME( size > 0 ); //if( size > 0 ) { u8* pmem = mtgsThread->GetDataPacketPtr(); - memcpy_aligned(pmem, (u8*)pMem+addr, size); + memcpy_aligned(pmem, (u8*)pMem+addr, size<<4); mtgsThread->SendDataPacket(); } } -//------------------------------------------------------------------ \ No newline at end of file +//------------------------------------------------------------------ diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 62250154fc..20786ecfb0 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -443,7 +443,7 @@ void SuperVUReset(int vuindex) } // clear the block and any joining blocks -__forceinline void SuperVUClear(u32 startpc, u32 size, int vuindex) +void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex) { vector::iterator itrange; list::iterator it = s_listVUHeaders[vuindex].begin(); diff --git a/pcsx2/x86/iVUzerorec.h b/pcsx2/x86/iVUzerorec.h index 9af1d42dbe..1f451fd58f 100644 --- a/pcsx2/x86/iVUzerorec.h +++ b/pcsx2/x86/iVUzerorec.h @@ -23,7 +23,7 @@ #include "iVUmicro.h" -extern void SuperVUAlloc(int vuindex); // global VU resources aare automatically allocated if necessary. +extern void SuperVUAlloc(int vuindex); // global VU resources are automatically allocated if necessary. extern void SuperVUDestroy(int vuindex); // if vuindex is -1, destroys everything extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything @@ -37,7 +37,7 @@ extern void svudispfntemp(); #ifdef __LINUX__ } #endif -extern void SuperVUClear(u32 startpc, u32 size, int vuindex); +extern void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex); // read = 0, will write to reg // read = 1, will read from reg