newVif: fixed tekken 5 while keeping games like gradius 5 working.

hopefully this fixes the rest of the problems newVif was having with games..

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2429 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2010-01-15 07:43:52 +00:00
parent b1ef60c0f7
commit cbc043156d
3 changed files with 70 additions and 79 deletions

View File

@ -165,7 +165,7 @@ static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modR
} }
static bool UsesTwoRegs[] = static bool UsesTwoRegs[] =
{ {
true, true, true, true, true, true, true, true,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, true, false, false, false, true,
@ -221,15 +221,22 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
} }
if (doMode==2) writeBackRow(); if (doMode==2) writeBackRow();
xMOV(ptr32[&v.vif->cl], vCL); xMOV(ptr32[&v.vif->cl], vCL);
xMOV(ptr32[&v.vifRegs->num], vNum); xMOV(ptr32[&v.vifRegs->num], vNum);
xRET(); xRET();
} }
static _f u8* dVifsetVUptr(const nVifStruct& v, int offset) { static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
u8* ptr = (u8*)(v.VU->Mem + (offset & v.vuMemLimit)); u8* endPtr; // Check if we need to wrap around VU memory
u8* endPtr = ptr + _vBlock.num * 16; u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit));
if (endPtr > v.vuMemEnd) { if (!isFill) { // Account for skip-cycles
int skipSize = cl - wl;
int blocks = _vBlock.num / wl;
int skips = (blocks * skipSize + _vBlock.num) * 16;
endPtr = ptr + skips;
}
else endPtr = ptr + (_vBlock.num * 16);
if ( endPtr >= v.vuMemEnd ) {
DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter."); DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
ptr = NULL; // Fall Back to Interpreters which have wrap-around logic ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
} }
@ -266,10 +273,10 @@ _f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
// Zero out the mask parameter if it's unused -- games leave random junk // Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses. // values here which cause false recblock cache misses.
_vBlock.mask = (doMask || ((_vBlock.mode&3)!=0) ) ? v.vifRegs->mask : 0x00; _vBlock.mask = (doMask || (_vBlock.mode&3)) ? v.vifRegs->mask : 0;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
if( u8* dest = dVifsetVUptr(v, v.vif->tag.addr) ) { if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
//DevCon.WriteLn("Running Recompiled Block!"); //DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
} }
@ -281,10 +288,10 @@ _f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
} }
static int recBlockNum = 0; static int recBlockNum = 0;
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++); DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)", //DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)",
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl, // _vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored" // doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
); //);
xSetPtr(v.recPtr); xSetPtr(v.recPtr);
_vBlock.startPtr = (uptr)xGetAlignedCallTarget(); _vBlock.startPtr = (uptr)xGetAlignedCallTarget();

View File

@ -96,10 +96,10 @@ struct nVifStruct {
extern __aligned16 nVifStruct nVif[2]; extern __aligned16 nVifStruct nVif[2];
extern __aligned16 const u8 nVifT[32]; extern __aligned16 const u8 nVifT[32];
extern __aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle] extern __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle]
extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector] extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
static const bool useOldUnpack = false; // Use code in newVif_OldUnpack.inl static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl
static const bool newVifDynaRec = true; // Use code in newVif_Dynarec.inl static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl
#endif #endif

View File

@ -66,7 +66,7 @@ static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
{{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> }, {{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> },
{ _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },}, { _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },},
{{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> }, {{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> },
{ _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },}, { _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },},
}; };
// Unpacks until 1 normal write cycle unpack has been written to VU mem // Unpacks until 1 normal write cycle unpack has been written to VU mem
@ -74,7 +74,7 @@ static const __aligned16 Fnptr_VifUnpackLoop UnpackSingleTable[2][2][2] = {
{{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> }, {{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> },
{ _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },}, { _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },},
{{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> }, {{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> },
{ _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },}, { _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },},
}; };
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@ -98,71 +98,58 @@ static _f u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) {
static _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) { static _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check
ptr += amount; ptr += amount;
vif->tag.addr += amount;
int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000)); int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000));
if (diff >= 0) { if (diff >= 0) {
ptr = (u8*)(vuMemBase + diff); ptr = (u8*)(vuMemBase + diff);
DevCon.WriteLn("wrap!");
} }
} }
static _f void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) { static _f void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) {
pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check
ptr += 16; ptr += 16;
if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) ) vif->tag.addr += 16;
if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) ) {
ptr -= (vuidx ? 0x4000 : 0x1000); ptr -= (vuidx ? 0x4000 : 0x1000);
DevCon.WriteLn("wrap!");
}
} }
int nVifUnpack(int idx, u8* data) { int nVifUnpack(int idx, u8* data) {
XMMRegisters::Freeze(); XMMRegisters::Freeze();
nVifStruct& v = nVif[idx]; nVifStruct& v = nVif[idx];
vif = v.vif; vif = v.vif;
vifRegs = v.vifRegs; vifRegs = v.vifRegs;
int ret = aMin(vif->vifpacketsize, vif->tag.size);
s32 size = ret << 2;
const u8& vifT = nVifT[vif->cmd & 0xf];
vif->tag.size -= ret; const int ret = aMin(vif->vifpacketsize, vif->tag.size);
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
s32 size = ret << 2;
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl); if (v.partTransfer) { // Last transfer was a partial vector transfer...
const u8& vifT = nVifT[vif->cmd & 0xf];
if (v.partTransfer) { // Last transfer was a partial vector transfer... const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10); const u8 upkNum = vif->cmd & 0x1f;
const u8 upkNum = vif->cmd & 0x1f; const int diff = vifT - v.partTransfer;
const int diff = vifT - v.partTransfer;
memcpy(&v.partBuffer[v.partTransfer], data, diff); memcpy(&v.partBuffer[v.partTransfer], data, diff);
UnpackSingleTable[idx][doMode][isFill]( v.partBuffer, size ); UnpackSingleTable[idx][doMode][isFill](v.partBuffer, size);
//DevCon.WriteLn("Diff = %d; vifT = %d; size = %d", diff, vifT, size);
data += diff; data += diff;
size -= diff; size -= diff;
vif->tag.addr += 16; v.partTransfer = 0;
v.partTransfer = 0; }
//DevCon.WriteLn("Diff = %d", diff);
if (ret == v.vif->tag.size) { // Full Transfer
dVifUnpack(idx, data, size, isFill);
vif->tag.size = 0;
vif->cmd = 0;
}
else { // Partial Transfer
_nVifUnpack(idx, data, size, isFill);
vif->tag.size -= ret;
} }
u32 oldNum = vifRegs->num;
if (size > 0) {
if (newVifDynaRec) dVifUnpack(idx, data, size, isFill);
else _nVifUnpack(idx, data, size, isFill);
}
u32 s =(size/vifT) * vifT;
u32 d = size - s;
s32 temp = oldNum * vifT - s; // ToDo: Handle filling write partial logic
if (temp > 0) { // Current transfer is partial
if (d > 0) { // Partial Vector Transfer
//DevCon.WriteLn("partial transfer!");
memcpy(v.partBuffer, &((u8*)data)[s], d);
v.partTransfer = d;
}
vifRegs->num += temp / vifT;
vif->tag.addr +=(oldNum - vifRegs->num) * 16;
}
if (vif->tag.size <= 0) {
vif->tag.size = 0;
vif->cmd = 0;
}
XMMRegisters::Thaw(); XMMRegisters::Thaw();
return ret; return ret;
} }
@ -176,12 +163,12 @@ static void setMasks(int idx, const VIFregisters& v) {
nVifMask[1][i/4][i%4] = 0; nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = 0; nVifMask[2][i/4][i%4] = 0;
break; break;
case 1: // Row case 1: // Row // todo: use g_vifmask
nVifMask[0][i/4][i%4] = 0; nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0; nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4]; nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4];
break; break;
case 2: // Col case 2: // Col // todo: use g_vifmask
nVifMask[0][i/4][i%4] = 0; nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0; nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4]; nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4];
@ -226,27 +213,17 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
const int usn = !!(vif->usn); const int usn = !!(vif->usn);
const int upkNum = vif->cmd & 0x1f; const int upkNum = vif->cmd & 0x1f;
//const s8& vift = nVifT[upkNum]; // might be useful later when other SSE paths are finished. //const s8& vift = nVifT[upkNum]; // might be useful later when other SSE paths are finished.
// Recompiled Unpacker, used when doMode is false.
// Did a bunch of work to make it so I could optimize this index lookup to outside
// the main loop but it was for naught -- too often the loop is only 1-2 iterations,
// so this setup code ends up being slower (1 iter) or same speed (2 iters).
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ]; const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
// Interpreted Unpacker, used if doMode is true OR if isFill is true. Lookup is
// always performed for now, due to ft.gsize reference (seems faster than using
// nVifT for now)
const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum]; const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS; UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
// Cache vuMemBase to a local var because the VU1's is a dereferenced pointer that
// mucks up compiler optimizations on the internal loops. >_< --air
const u8* vuMemBase = (idx ? VU1 : VU0).Mem; const u8* vuMemBase = (idx ? VU1 : VU0).Mem;
u8* dest = setVUptr(idx, vuMemBase, vif->tag.addr); u8* dest = setVUptr(idx, vuMemBase, vif->tag.addr);
if (vif->cl >= blockSize) vif->cl = 0; if (vif->cl >= blockSize) vif->cl = 0;
while (vifRegs->num) { while (vifRegs->num && (size >= ft.gsize)) {
if (vif->cl < cycleSize) { if (vif->cl < cycleSize) {
if (doMode) { if (doMode) {
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum); //DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
@ -257,10 +234,11 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
fnbase[aMin(vif->cl, 3)](dest, data); fnbase[aMin(vif->cl, 3)](dest, data);
} }
data += ft.gsize; data += ft.gsize;
size -= ft.gsize;
vifRegs->num--; vifRegs->num--;
incVUptrBy16(idx, dest, vuMemBase); incVUptrBy16(idx, dest, vuMemBase);
if (++vif->cl == blockSize) vif->cl = 0; if (++vif->cl == blockSize) vif->cl = 0;
if (singleUnpack) break; if (singleUnpack) return;
} }
else if (isFill) { else if (isFill) {
//DevCon.WriteLn("isFill!"); //DevCon.WriteLn("isFill!");
@ -274,6 +252,12 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
vif->cl = 0; vif->cl = 0;
} }
} }
if (vifRegs->num && ((s32)size > 0)) { // Partial Vector Transfer
//DevCon.WriteLn("partial transfer! [%d]", size);
memcpy(nVif[idx].partBuffer, data, size);
nVif[idx].partTransfer = size;
}
} }
_f void _nVifUnpack(int idx, u8 *data, u32 size, bool isFill) { _f void _nVifUnpack(int idx, u8 *data, u32 size, bool isFill) {