newVif: fixed tekken 5 while keeping games like gradius 5 working.

hopefully this fixes the rest of the problems newVif was having with games.. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2429 96395faa-99c1-11dd-bbfe-3dabce05a288
2010-01-15 07:43:52 +00:00 · 2010-01-15 07:43:52 +00:00 · cbc043156d
parent b1ef60c0f7
commit cbc043156d
3 changed files with 70 additions and 79 deletions
--- a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp
+++ b/pcsx2/x86/VifUnpackSSE_Dynarec.cpp
@ -165,7 +165,7 @@ static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modR
 }
 static bool UsesTwoRegs[] = 
 {
-	true, true, true, true,
+	true,  true,  true,  true,
 	false, false, false, false,
 	false, false, false, false,
 	false, false, false, true,
@ -221,15 +221,22 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
 	}

 	if (doMode==2) writeBackRow();
-	xMOV(ptr32[&v.vif->cl],	   vCL);
+	xMOV(ptr32[&v.vif->cl],	     vCL);
 	xMOV(ptr32[&v.vifRegs->num], vNum);
 	xRET();
 }

-static _f u8* dVifsetVUptr(const nVifStruct& v, int offset) {
-	u8* ptr	   = (u8*)(v.VU->Mem + (offset & v.vuMemLimit));
-	u8* endPtr = ptr + _vBlock.num * 16;
-	if (endPtr > v.vuMemEnd) {
+static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
+	u8* endPtr; // Check if we need to wrap around VU memory
+	u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit));
+	if (!isFill) { // Account for skip-cycles
+		int skipSize  = cl - wl;
+		int blocks    = _vBlock.num / wl;
+		int skips	  = (blocks * skipSize + _vBlock.num) * 16;
+		endPtr = ptr + skips;
+	}
+	else endPtr  = ptr + (_vBlock.num * 16);
+	if ( endPtr >= v.vuMemEnd ) {
 		DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
 		ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
 	}
@ -266,10 +273,10 @@ _f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {

 	// Zero out the mask parameter if it's unused -- games leave random junk
 	// values here which cause false recblock cache misses.
-	_vBlock.mask	  = (doMask || ((_vBlock.mode&3)!=0) ) ? v.vifRegs->mask : 0x00;
+	_vBlock.mask	  = (doMask || (_vBlock.mode&3)) ? v.vifRegs->mask : 0;

 	if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
-		if( u8* dest = dVifsetVUptr(v, v.vif->tag.addr) ) {
+		if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
 			//DevCon.WriteLn("Running Recompiled Block!");
 			((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
 		}
@ -281,10 +288,10 @@ _f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
 	}
 	static int recBlockNum = 0;
 	DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
-	DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)",
-		_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
-		doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
-	);
+	//DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)",
+	//	_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
+	//	doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
+	//);

 	xSetPtr(v.recPtr);
 	_vBlock.startPtr = (uptr)xGetAlignedCallTarget();
--- a/pcsx2/x86/newVif.h
+++ b/pcsx2/x86/newVif.h
@ -96,10 +96,10 @@ struct nVifStruct {

 extern __aligned16 nVifStruct nVif[2];
 extern __aligned16 const u8 nVifT[32];
-extern __aligned16 nVifCall nVifUpk[(2*2*16)  *4];		// ([USN][Masking][Unpack Type]) [curCycle]
-extern __aligned16 u32		nVifMask[3][4][4];			// [MaskNumber][CycleNumber][Vector]
+extern __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle]
+extern __aligned16 u32		nVifMask[3][4][4];	 // [MaskNumber][CycleNumber][Vector]

-static const bool useOldUnpack = false;		// Use code in newVif_OldUnpack.inl
-static const bool newVifDynaRec = true;		// Use code in newVif_Dynarec.inl
+static const bool useOldUnpack  = 0; // Use code in newVif_OldUnpack.inl
+static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl

 #endif
--- a/pcsx2/x86/newVif_Unpack.cpp
+++ b/pcsx2/x86/newVif_Unpack.cpp
@ -66,7 +66,7 @@ static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
 	{{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> },
 	{  _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },},
 	{{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> },
-	{ _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },},
+	{  _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },},
 };

 // Unpacks until 1 normal write cycle unpack has been written to VU mem
@ -74,7 +74,7 @@ static const __aligned16 Fnptr_VifUnpackLoop UnpackSingleTable[2][2][2] = {
 	{{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> },
 	{  _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },},
 	{{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> },
-	{ _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },},
+	{  _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },},
 };
 // ----------------------------------------------------------------------------

@ -98,71 +98,58 @@ static _f u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) {

 static _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
 	pxAssume( ((uptr)ptr & 0xf) == 0 );		// alignment check
-	ptr += amount;
+	ptr			  += amount;
+	vif->tag.addr += amount;
 	int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000));
 	if (diff >= 0) {
 		ptr = (u8*)(vuMemBase + diff);
+		DevCon.WriteLn("wrap!");
 	}
 }

 static _f void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) {
 	pxAssume( ((uptr)ptr & 0xf) == 0 );	// alignment check
-	ptr += 16;
-	if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) )
+	ptr			  += 16;
+	vif->tag.addr += 16;
+	if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) ) {
 		ptr -= (vuidx ? 0x4000 : 0x1000);
+		DevCon.WriteLn("wrap!");
+	}
 }

 int nVifUnpack(int idx, u8* data) {
 	XMMRegisters::Freeze();
 	nVifStruct& v = nVif[idx];
-	vif		 = v.vif;
-	vifRegs	 = v.vifRegs;
-	int ret  = aMin(vif->vifpacketsize, vif->tag.size);
-	s32 size = ret << 2;
-	const u8& vifT = nVifT[vif->cmd & 0xf];
+	vif		= v.vif;
+	vifRegs = v.vifRegs;

-	vif->tag.size -= ret;
+	const int  ret    = aMin(vif->vifpacketsize, vif->tag.size);
+	const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
+	s32		   size   = ret << 2;

-	const bool  isFill	= (vifRegs->cycle.cl < vifRegs->cycle.wl);
-
-	if (v.partTransfer) { // Last transfer was a partial vector transfer...
-		const bool  doMode	=  vifRegs->mode && !(vif->tag.cmd & 0x10);
-		const u8    upkNum	=  vif->cmd & 0x1f;
-		const int   diff	=  vifT - v.partTransfer;
+    if (v.partTransfer) { // Last transfer was a partial vector transfer...
+		const u8&	vifT	= nVifT[vif->cmd & 0xf];
+		const bool  doMode	= vifRegs->mode && !(vif->tag.cmd & 0x10);
+		const u8    upkNum	= vif->cmd & 0x1f;
+		const int   diff	= vifT - v.partTransfer;
 		memcpy(&v.partBuffer[v.partTransfer], data, diff);
-		UnpackSingleTable[idx][doMode][isFill]( v.partBuffer, size );
+		UnpackSingleTable[idx][doMode][isFill](v.partBuffer, size);
+		//DevCon.WriteLn("Diff = %d; vifT = %d; size = %d", diff, vifT, size);
 		data += diff;
 		size -= diff;
-		vif->tag.addr  += 16;
-		v.partTransfer  =  0;
-		//DevCon.WriteLn("Diff = %d", diff);
+		v.partTransfer = 0;
+    }
+
+	if (ret == v.vif->tag.size) { // Full Transfer
+		dVifUnpack(idx, data, size, isFill);
+		vif->tag.size = 0;
+		vif->cmd = 0; 
+	}
+	else { // Partial Transfer	
+		_nVifUnpack(idx, data, size, isFill);
+		vif->tag.size -= ret;
 	}

-	u32 oldNum = vifRegs->num;
-
-	if (size > 0) {
-		if (newVifDynaRec)	dVifUnpack(idx, data, size, isFill);
-		else			   _nVifUnpack(idx, data, size, isFill);
-	}
-
-	u32 s	 =(size/vifT)  * vifT;
-	u32 d	 = size - s;
-	s32 temp = oldNum * vifT - s; // ToDo: Handle filling write partial logic
-	
-	if (temp > 0) {	 // Current transfer is partial
-		if (d > 0) { // Partial Vector Transfer
-			//DevCon.WriteLn("partial transfer!");
-			memcpy(v.partBuffer, &((u8*)data)[s], d);
-			v.partTransfer = d;
-		}
-		vifRegs->num  += temp / vifT;
-		vif->tag.addr +=(oldNum - vifRegs->num) * 16;
-	}
-	
-	if (vif->tag.size <= 0) {
-		vif->tag.size  = 0;
-		vif->cmd       = 0;
-	}
 	XMMRegisters::Thaw();
 	return ret;
 }
@ -176,12 +163,12 @@ static void setMasks(int idx, const VIFregisters& v) {
 				nVifMask[1][i/4][i%4] = 0;
 				nVifMask[2][i/4][i%4] = 0;
 				break;
-			case 1: // Row
+			case 1: // Row // todo: use g_vifmask
 				nVifMask[0][i/4][i%4] = 0;
 				nVifMask[1][i/4][i%4] = 0;
 				nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4];
 				break;
-			case 2: // Col
+			case 2: // Col // todo: use g_vifmask
 				nVifMask[0][i/4][i%4] = 0;
 				nVifMask[1][i/4][i%4] = 0;
 				nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4];
@ -226,27 +213,17 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {

 	const int	usn		= !!(vif->usn);
 	const int	upkNum	= vif->cmd & 0x1f;
-	//const s8&	vift	= nVifT[upkNum];	// might be useful later when other SSE paths are finished.
-
-	// Recompiled Unpacker, used when doMode is false.
-	// Did a bunch of work to make it so I could optimize this index lookup to outside
-	// the main loop but it was for naught -- too often the loop is only 1-2 iterations,
-	// so this setup code ends up being slower (1 iter) or same speed (2 iters).
+	//const s8&	vift	= nVifT[upkNum]; // might be useful later when other SSE paths are finished.
+	
 	const nVifCall*	fnbase			= &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
-
-	// Interpreted Unpacker, used if doMode is true OR if isFill is true.  Lookup is
-	// always performed for now, due to ft.gsize reference (seems faster than using
-	// nVifT for now)
 	const VIFUnpackFuncTable& ft	= VIFfuncTable[upkNum];
 	UNPACKFUNCTYPE func				= usn ? ft.funcU : ft.funcS;
-
-	// Cache vuMemBase to a local var because the VU1's is a dereferenced pointer that
-	// mucks up compiler optimizations on the internal loops. >_< --air
+	
 	const u8* vuMemBase	= (idx ? VU1 : VU0).Mem;
 	u8* dest			= setVUptr(idx, vuMemBase, vif->tag.addr);
 	if (vif->cl >= blockSize)  vif->cl = 0;
 	
-	while (vifRegs->num) {
+	while (vifRegs->num && (size >= ft.gsize)) {
 		if (vif->cl < cycleSize) { 
 			if (doMode) {
 				//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
@ -257,10 +234,11 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
 				fnbase[aMin(vif->cl, 3)](dest, data);
 			}
 			data += ft.gsize;
+			size -= ft.gsize;
 			vifRegs->num--;
 			incVUptrBy16(idx, dest, vuMemBase);
 			if (++vif->cl == blockSize) vif->cl = 0;
-			if (singleUnpack) break;
+			if (singleUnpack) return;
 		}
 		else if (isFill) {
 			//DevCon.WriteLn("isFill!");
@ -274,6 +252,12 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
 			vif->cl = 0;
 		}
 	}
+
+	if (vifRegs->num && ((s32)size > 0)) { // Partial Vector Transfer
+        //DevCon.WriteLn("partial transfer! [%d]", size);
+        memcpy(nVif[idx].partBuffer, data, size);
+        nVif[idx].partTransfer = size;
+    }
 }

 _f void _nVifUnpack(int idx, u8 *data, u32 size, bool isFill) {