diff --git a/pcsx2/GS.h b/pcsx2/GS.h index 76ac704a7c..962f214226 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -73,34 +73,39 @@ enum GIF_REG GIF_REG_NOP = 0x0f, }; +// GIFTAG +// Members of this structure are in CAPS to help visually denote that they are representative +// of actual hw register states of the GIF, unlike the internal tracking vars in GIFPath, which +// are modified during the GIFtag unpacking process. struct GIFTAG { - u32 nloop : 15; - u32 eop : 1; + u32 NLOOP : 15; + u32 EOP : 1; u32 dummy0 : 16; u32 dummy1 : 14; - u32 pre : 1; - u32 prim : 11; - u32 flg : 2; - u32 nreg : 4; - u32 regs[2]; + u32 PRE : 1; + u32 PRIM : 11; + u32 FLG : 2; + u32 NREG : 4; + u32 REGS[2]; }; struct GIFPath { - GIFTAG tag; + const GIFTAG tag; // The "original tag -- modification allowed only by SetTag(), so let's make it const. + u8 regs[16]; // positioned after tag ensures 16-bit aligned (in case we SSE optimize later) + + u32 nloop; // local copy nloop counts toward zero, and leaves the tag copy unmodified. u32 curreg; - u32 _pad[3]; - u8 regs[16]; GIFPath(); - __forceinline void PrepRegs(bool doPrep); + __forceinline void PrepPackedRegs(); __forceinline void SetTag(const void* mem); __forceinline bool StepReg() { - if ((++curreg & 0xf) == tag.nreg) { + if ((++curreg & 0xf) == tag.NREG) { curreg = 0; - if (--tag.nloop == 0) { + if (--nloop == 0) { return false; } } @@ -246,11 +251,11 @@ public: void PostVsyncEnd( bool updategs ); protected: - // Saves MMX/XMM regs, posts an event to the mtgsThread flag and releases a timeslice. + // Saves MMX/XMM REGS, posts an event to the mtgsThread flag and releases a timeslice. // For use in surrounding loops that wait on the mtgs. void PrepEventWait(); - // Restores MMX/XMM regs. For use in surrounding loops that wait on the mtgs. + // Restores MMX/XMM REGS. For use in surrounding loops that wait on the mtgs. void PostEventWait() const; // Processes a GIFtag & packet, and throws out some gsIRQs as needed. diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index 7d22a84d20..64e5f37c84 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -68,19 +68,22 @@ GIFPath::GIFPath() memzero_obj( *this ); } -// unpack the registers -// registers are stored as a sequence of 4 bit values in the -// upper 64 bits of the GIFTAG. That sucks for us, so we unpack -// them into an 8 bit array. +// unpack the registers - registers are stored as a sequence of 4 bit values in the +// upper 64 bits of the GIFTAG. That sucks for us when handling partialized GIF packets +// coming in from paths 2 and 3, so we unpack them into an 8 bit array here. // -__forceinline void GIFPath::PrepRegs(bool doPrep = 1) +__forceinline void GIFPath::PrepPackedRegs() { - if (!doPrep) return; - int loopEnd = ((tag.nreg-1)&0xf) + 1; - u32 tempreg = tag.regs[0]; + // Only unpack registers if we're starting a new pack. Otherwise the unpacked + // array should have already been initialized by a previous partial transfer. + + if( curreg != 0 ) return; + + int loopEnd = ((tag.NREG-1)&0xf) + 1; + u32 tempreg = tag.REGS[0]; for (int i = 0; i < loopEnd; i++) { - if (i == 8) tempreg = tag.regs[1]; + if (i == 8) tempreg = tag.REGS[1]; regs[i] = tempreg & 0xf; tempreg >>= 4; } @@ -88,9 +91,10 @@ __forceinline void GIFPath::PrepRegs(bool doPrep = 1) __forceinline void GIFPath::SetTag(const void* mem) { - tag = *((GIFTAG*)mem); + const_cast(tag) = *((GIFTAG*)mem); + + nloop = tag.NLOOP; curreg = 0; - PrepRegs(); } static void _mtgsFreezeGIF( SaveStateBase& state, GIFPath (&paths)[3] ) @@ -243,11 +247,12 @@ void mtgsThreadObject::Reset() memzero_obj( s_path ); } -#define incTag(x, y) { \ +#define incTag(x, y) do { \ pMem += (x); \ size -= (y); \ if ((pathidx==GIF_PATH_1)&&(pMem>=vuMemEnd)) pMem -= 0x4000; \ -} +} while(false) + #define aMin(x, y) ((x < y) ? (x) : (y)) #define subVal(x, y) ((x > y) ? (x-y) : 0 ) @@ -259,27 +264,29 @@ __forceinline void gsHandler(const u8* pMem) { } } +// Parameters: +// size - max size of incoming data stream, in qwc (simd128) __forceinline int mtgsThreadObject::_gifTransferDummy(GIF_PATH pathidx, const u8* pMem, u32 size) { GIFPath& path = s_path[pathidx]; // Current Path const u8* vuMemEnd = pMem + (size<<4); // End of VU1 Mem - if (pathidx==GIF_PATH_1) size = 0x4000; // VU1 mem size u32 startSize = size; // Start Size while (size > 0) { - if (!path.tag.nloop) { + if (!path.nloop) { path.SetTag(pMem); incTag(16, 1); if (pathidx == GIF_PATH_3) { - if (path.tag.flg&2) Path3progress = IMAGE_MODE; + if (path.tag.FLG&2) Path3progress = IMAGE_MODE; else Path3progress = TRANSFER_MODE; } } else { - switch(path.tag.flg) { + switch(path.tag.FLG) { case GIF_FLG_PACKED: + path.PrepPackedRegs(); do { if (path.GetReg() == 0xe) { gsHandler(pMem); @@ -289,13 +296,13 @@ __forceinline int mtgsThreadObject::_gifTransferDummy(GIF_PATH pathidx, const u8 break; case GIF_FLG_REGLIST: { - u32 numRegs = (((path.tag.nreg-1)&0xf)+2)/2; - if((numRegs * path.tag.nloop) <= size) { + u32 numRegs = (((path.tag.NREG-1)&0xf)+2)/2; + if((numRegs * path.nloop) <= size) { u32 temp1 = (numRegs - path.curreg); - u32 temp2 = (numRegs * subVal(path.tag.nloop, 1)); + u32 temp2 = (numRegs * subVal(path.nloop, 1)); incTag((temp1*16), temp1); incTag((temp2*16), temp2); - path.tag.nloop = 0; + path.nloop = 0; } else { size *= 2; @@ -310,15 +317,15 @@ __forceinline int mtgsThreadObject::_gifTransferDummy(GIF_PATH pathidx, const u8 case GIF_FLG_IMAGE: case GIF_FLG_IMAGE2: { - int len = aMin(size, path.tag.nloop); + int len = aMin(size, path.nloop); incTag((len * 16), len); - path.tag.nloop -= len; + path.nloop -= len; } break; } } - if (path.tag.eop && !path.tag.nloop) { + if (path.tag.EOP && !path.nloop) { if (pathidx != GIF_PATH_2) { break; } @@ -328,7 +335,7 @@ __forceinline int mtgsThreadObject::_gifTransferDummy(GIF_PATH pathidx, const u8 size = (startSize - size); if (pathidx == GIF_PATH_3) { - if (path.tag.eop && !path.tag.nloop) { + if (path.tag.EOP && !path.nloop) { Path3progress = STOPPED_MODE; } gif->madr += size * 16; @@ -341,6 +348,8 @@ __forceinline int mtgsThreadObject::_gifTransferDummy(GIF_PATH pathidx, const u8 // Processes a GIFtag & packet, and throws out some gsIRQs as needed. // Used to keep interrupts in sync with the EE, while the GS itself // runs potentially several frames behind. +// Parameters: +// size - max size of incoming data stream, in qwc (simd128) __forceinline int mtgsThreadObject::gifTransferDummy(GIF_PATH pathidx, const u8* pMem, u32 size) { #ifdef PCSX2_GSRING_SAMPLING_STATS diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index 22eb3e8100..6eb322d669 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -94,7 +94,7 @@ public: template void Freeze( T& data ) { - FreezeMem( &data, sizeof( T ) ); + FreezeMem( const_cast((void*)&data), sizeof( T ) ); } // FreezeLegacy can be used to load structures short of their full size, which is diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index c373eddff5..2c062645ab 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1103,21 +1103,23 @@ mVUop(mVU_XITOP) { //------------------------------------------------------------------ void __fastcall mVU_XGKICK_(u32 addr) { - addr = (addr<<4) & 0x3fff; // Multiply addr by 16 to get real address - u8* data = (u8*)(microVU1.regs->Mem + addr); - u32 diff = 0x4000 - addr; - u32 size = mtgsThread->PrepDataPacket(GIF_PATH_1, data, diff >> 4); + addr &= 0x3ff; + u8* data = microVU1.regs->Mem + (addr*16); + u32 diff = 0x400 - addr; + u32 size = mtgsThread->PrepDataPacket(GIF_PATH_1, data, 0x400); u8* pDest = mtgsThread->GetDataPacketPtr(); - if((size << 4) > diff) { + if(size > diff) { + // fixme: one of these days the following *16's will get cleaned up when we introduce + // a special qwc/simd16 optimized version of memcpy_aligned. :) //DevCon::Status("XGkick Wrap!"); - memcpy_aligned(pDest, microVU1.regs->Mem + addr, diff); - size -= diff >> 4; - pDest += diff; - memcpy_aligned(pDest, microVU1.regs->Mem, size<<4); + memcpy_aligned(pDest, microVU1.regs->Mem + addr, diff*16); + size -= diff; + pDest += diff*16; + memcpy_aligned(pDest, microVU1.regs->Mem, size*16); } else { - memcpy_aligned(pDest, microVU1.regs->Mem + addr, size<<4); + memcpy_aligned(pDest, microVU1.regs->Mem + addr, size*16); } mtgsThread->SendDataPacket(); } diff --git a/pcsx2/x86/sVU_Lower.cpp b/pcsx2/x86/sVU_Lower.cpp index 45d5ed2378..ea2c41e1c1 100644 --- a/pcsx2/x86/sVU_Lower.cpp +++ b/pcsx2/x86/sVU_Lower.cpp @@ -1975,12 +1975,7 @@ void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) u32 size; u8* data = ((u8*)pMem + (addr&0x3fff)); - // fixme: The gifTagDummy function in the MTGS (called by PrepDataPacket) has a - // hack that aborts the packet if it goes past the end of VU1 memory. - // Chances are this should be a "loops around memory" situation, and the packet - // should be continued starting at addr zero (0). - - size = mtgsThread->PrepDataPacket(GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4); + size = mtgsThread->PrepDataPacket(GIF_PATH_1, data, 0x4000 / 16 ); jASSUME( size > 0 ); u8* pmem = mtgsThread->GetDataPacketPtr();