Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
refraction 2009-04-11 17:45:14 +00:00
parent a388d2969c
commit 4df3f80d30
5 changed files with 331 additions and 299 deletions

View File

@ -25,10 +25,10 @@
#include "Vif.h"
#include "VifDma.h"
VIFregisters *_vifRegs;
u32* _vifRow = NULL, *_vifCol = NULL;
u32* _vifMaskRegs = NULL;
vifStruct *_vif;
VIFregisters *vifRegs;
u32* vifRow = NULL, *vifCol = NULL;
u32* vifMaskRegs = NULL;
vifStruct *vif;
PCSX2_ALIGNED16(u32 g_vifRow0[4]);
PCSX2_ALIGNED16(u32 g_vifCol0[4]);
@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
int n;
u32 vifRowReg = getVifRowRegs(offnum);
if (_vifRegs->code & 0x10000000)
if (vifRegs->code & 0x10000000)
{
switch (_vif->cl)
switch (vif->cl)
{
case 0:
if (offnum == OFFSET_X)
n = (_vifRegs->mask) & 0x3;
n = (vifRegs->mask) & 0x3;
else
n = (_vifRegs->mask >> (offnum * 2)) & 0x3;
n = (vifRegs->mask >> (offnum * 2)) & 0x3;
break;
case 1:
n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
break;
case 2:
n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
break;
default:
n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
break;
}
}
@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
switch (n)
{
case 0:
if ((_vif->cmd & 0x6F) == 0x6f)
if ((vif->cmd & 0x6F) == 0x6f)
{
dest = data;
}
else switch (_vifRegs->mode)
else switch (vifRegs->mode)
{
case 1:
dest = data + vifRowReg;
@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
dest = vifRowReg;
break;
case 2:
dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl);
dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
break;
case 3:
//Masked so don't do anything
break;
}
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data);
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
}
template <class T>
@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size)
template <class T>
void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
{
if (_vifRegs->offset == OFFSET_X)
if (vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW(_vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_Y;
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if (_vifRegs->offset == OFFSET_Y)
if (vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW(_vifRegs->offset, *dest++, *data);
_vifRegs->offset = OFFSET_Z;
writeXYZW(vifRegs->offset, *dest++, *data);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if (_vifRegs->offset == OFFSET_Z)
if (vifRegs->offset == OFFSET_Z)
{
writeXYZW(_vifRegs->offset, *dest++, *dest-2);
_vifRegs->offset = OFFSET_W;
writeXYZW(vifRegs->offset, *dest++, *dest-2);
vifRegs->offset = OFFSET_W;
}
if (_vifRegs->offset == OFFSET_W)
if (vifRegs->offset == OFFSET_W)
{
writeXYZW(_vifRegs->offset, *dest, *data);
_vifRegs->offset = OFFSET_X;
writeXYZW(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <class T>
void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
{
if(_vifRegs->offset == OFFSET_X)
if(vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW(_vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_Y;
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if(_vifRegs->offset == OFFSET_Y)
if(vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW(_vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_Z;
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if(_vifRegs->offset == OFFSET_Z)
if(vifRegs->offset == OFFSET_Z)
{
if (size > 0)
{
writeXYZW(_vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_W;
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_W;
size--;
}
}
if(_vifRegs->offset == OFFSET_W)
if(vifRegs->offset == OFFSET_W)
{
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
writeXYZW(_vifRegs->offset, *dest, *data);
_vifRegs->offset = OFFSET_X;
writeXYZW(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
{
while (size > 0)
{
writeXYZW(_vifRegs->offset, *dest++, *data++);
_vifRegs->offset++;
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset++;
size--;
}
if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X;
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
}
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer()
return ret;
}
static __forceinline int mfifoVIF1chain()
static __forceinline int mfifo_VIF1chain()
{
int ret;
@ -531,7 +530,7 @@ void vifMFIFOInterrupt()
{
g_vifCycles = 0;
if (vif1.inprogress == 1) mfifoVIF1chain();
if (vif1.inprogress == 1) mfifo_VIF1chain();
if (vif1.irq && vif1.tag.size == 0)
{

View File

@ -81,9 +81,9 @@ struct VIFregisters {
extern "C"
{
// these use cdecl for Asm code references.
extern VIFregisters *_vifRegs;
extern u32* _vifMaskRegs;
extern u32* _vifRow;
extern VIFregisters *vifRegs;
extern u32* vifMaskRegs;
extern u32* vifRow;
extern u32* _vifCol;
}
@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
switch (reg)
{
case 0:
_vifRegs->r0 = data;
vifRegs->r0 = data;
break;
case 1:
_vifRegs->r1 = data;
vifRegs->r1 = data;
break;
case 2:
_vifRegs->r2 = data;
vifRegs->r2 = data;
break;
case 3:
_vifRegs->r3 = data;
vifRegs->r3 = data;
break;
jNO_DEFAULT;
}
@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg)
switch (reg)
{
case 0:
return _vifRegs->r0;
return vifRegs->r0;
break;
case 1:
return _vifRegs->r1;
return vifRegs->r1;
break;
case 2:
return _vifRegs->r2;
return vifRegs->r2;
break;
case 3:
return _vifRegs->r3;
return vifRegs->r3;
break;
jNO_DEFAULT;
}
@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data)
switch (reg)
{
case 0:
_vifRegs->c0 = data;
vifRegs->c0 = data;
break;
case 1:
_vifRegs->c1 = data;
vifRegs->c1 = data;
break;
case 2:
_vifRegs->c2 = data;
vifRegs->c2 = data;
break;
case 3:
_vifRegs->c3 = data;
vifRegs->c3 = data;
break;
jNO_DEFAULT;
}
@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg)
switch (reg)
{
case 0:
return _vifRegs->c0;
return vifRegs->c0;
break;
case 1:
return _vifRegs->c1;
return vifRegs->c1;
break;
case 2:
return _vifRegs->c2;
return vifRegs->c2;
break;
case 3:
return _vifRegs->c3;
return vifRegs->c3;
break;
jNO_DEFAULT;
}

View File

@ -37,10 +37,10 @@ using namespace std; // for min / max
extern "C"
{
// Need cdecl on these for ASM references.
extern VIFregisters *_vifRegs;
extern u32* _vifMaskRegs;
extern u32* _vifRow;
extern u32* _vifCol;
extern VIFregisters *vifRegs;
extern u32* vifMaskRegs;
extern u32* vifRow;
extern u32* vifCol;
}
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]);
@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]);
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]);
PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]);
extern vifStruct *_vif;
extern vifStruct *vif;
vifStruct vif0, vif1;
@ -254,20 +254,8 @@ __forceinline static int _limit(int a, int max)
static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum)
{
const VIFUnpackFuncTable *unpack;
vifStruct *vif;
VIFregisters *vifRegs;
unpack = &VIFfuncTable[ unpackType ];
if (VIFdmanum == 0)
{
vif = &vif0;
vifRegs = vif0Regs;
}
else
{
vif = &vif1;
vifRegs = vif1Regs;
}
unpack = &VIFfuncTable[ unpackType ];
switch (unpackType)
{
@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int
}
//This is sorted out later
vif->tag.addr &= ~0xf;
if((vif->tag.addr & 0xf) != (vifRegs->offset * 4))
{
VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr);
vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4);
}
}
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
{
u32 *dest;
u32 unpackType;
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
vifStruct *vif;
VIFregisters *vifRegs;
VURegs * VU;
u8 *cdata = (u8*)data;
#ifdef _DEBUG
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
#endif
_mm_prefetch((char*)data, _MM_HINT_NTA);
if (VIFdmanum == 0)
{
VU = &VU0;
vif = &vif0;
vifRegs = vif0Regs;
assert(v->addr < memsize);
}
else
{
VU = &VU1;
vif = &vif1;
vifRegs = vif1Regs;
assert(v->addr < memsize);
if (vu1MicroIsSkipping())
{
// don't process since the frame is dummy
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
return;
}
}
dest = (u32*)(VU->Mem + v->addr);
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
#ifdef _DEBUG
if (v->size != size)
{
VIF_LOG("*PCSX2*: warning v->size != size");
}
if ((v->addr + size*4) > memsize)
{
Console::Notice("*PCSX2*: fixme unpack overflow");
Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x",
params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr);
}
#endif
// The unpack type
unpackType = v->cmd & 0xf;
if (size == 0)
{
VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask);
}
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
_vifRegs = (VIFregisters*)vifRegs;
_vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks;
_vif = vif;
_vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
ft = &VIFfuncTable[ unpackType ];
func = _vif->usn ? ft->funcU : ft->funcS;
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
memsize = size;
#endif
if (v->size != (size >> 2))
ProcessMemSkip(size, unpackType, VIFdmanum);
if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000))
if(vif1Regs->offset != 0)
{
//Sanity Check (memory overflow)
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
}
if (_vifRegs->offset > 0)
{
int destinc, unpacksize;
int unpacksize;
//This is just to make sure the alignment isnt loopy on a split packet
if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
{
DevCon::Error("Warning: Unpack alignment error");
}
@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize)
VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset);
// SSE doesn't handle such small data
if (vifRegs->offset < (u32)ft->qsize)
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
{
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
{
Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
}
DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
}
unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset));
}
else
{
unpacksize = 0;
Console::WriteLn("Unpack align offset = 0");
}
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
destinc = (4 - ft->qsize) + unpacksize;
func(dest, (u32*)cdata, unpacksize);
size -= unpacksize * ft->dsize;
cdata += unpacksize * ft->dsize;
vifRegs->num--;
++vif->cl;
if (vif->cl == vifRegs->cycle.wl)
{
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
{
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4;
//dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
}
else
dest += destinc;
{
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
//dest += destinc;
}
vif->cl = 0;
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
return size >> 2;
}
else
{
dest += destinc;
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
dest += (4 - ft->qsize) + unpacksize;
cdata += unpacksize * ft->dsize;
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
}
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
}
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
if (vif->cl != 0) //Check alignment for SSE unpacks
{
#ifdef _DEBUG
@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
int incdest;
if (vif->cl != 0)
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
{
// continuation from last stream
@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if (vif->cl == vifRegs->cycle.wl)
{
dest += incdest;
vif->tag.addr += incdest * 4;
vif->cl = 0;
break;
}
dest += 4;
vif->tag.addr += 16;
}
// have to update
_vifRow[0] = _vifRegs->r0;
_vifRow[1] = _vifRegs->r1;
_vifRow[2] = _vifRegs->r2;
_vifRow[3] = _vifRegs->r3;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
}
}
return size>>2;
}
if ((size >= ft->gsize) && !(v->addr&0xf))
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
{
u32 *dest;
u32 unpackType;
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
VURegs * VU;
u8 *cdata = (u8*)data;
#ifdef _DEBUG
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
#endif
_mm_prefetch((char*)data, _MM_HINT_NTA);
if (VIFdmanum == 0)
{
VU = &VU0;
//vifRegs = vif0Regs;
assert(v->addr < memsize);
}
else
{
VU = &VU1;
//vifRegs = vif1Regs;
assert(v->addr < memsize);
if (vu1MicroIsSkipping())
{
// don't process since the frame is dummy
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
return;
}
}
dest = (u32*)(VU->Mem + v->addr);
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
// The unpack type
unpackType = v->cmd & 0xf;
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
ft = &VIFfuncTable[ unpackType ];
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
#ifdef _DEBUG
memsize = size;
#endif
#ifdef VIFUNPACKDEBUG
if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) *
((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000))
{
//Sanity Check (memory overflow)
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
}
#endif
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
{
#ifdef _DEBUG
static int s_count = 0;
#endif
if (size >= ft->gsize)
{
const UNPACKPARTFUNCTYPESSE* pfn;
int writemask;
@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
if(vifRegs->mode == 2)
{
//Update the reg rows for non SSE
vifRegs->r0 = vifRow[0];
vifRegs->r1 = vifRow[1];
vifRegs->r2 = vifRow[2];
vifRegs->r3 = vifRow[3];
}
// if size is left over, update the src,dst pointers
if (writemask > 0)
{
@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
cdata += left * ft->gsize;
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
vifRegs->num -= left;
_vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
size = writemask;
if (size >= ft->dsize && vifRegs->num > 0)
{
//VIF_LOG("warning, end with size = %d", size);
/* unpack one qword */
vif->tag.addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
}
}
else
{
vifRegs->num -= size / ft->gsize;
if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
size = 0;
}
size = writemask;
_vifRegs->r0 = _vifRow[0];
_vifRegs->r1 = _vifRow[1];
_vifRegs->r2 = _vifRow[2];
_vifRegs->r3 = _vifRow[3];
}
else
{
if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available
{
// v4-32
if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0))
{
vifRegs->num -= size >> 4;
memcpy_fast((u8*)dest, cdata, size);
size = 0;
return;
}
}
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
while ((size >= ft->gsize) && (vifRegs->num > 0))
{
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
size -= ft->gsize;
vifRegs->num--;
//if(vifRegs->num == loophere) dest = (u32*)(VU->Mem);
++vif->cl;
if (vif->cl == vifRegs->cycle.wl)
{
dest += incdest;
vif->cl = 0;
}
else
{
dest += 4;
}
}
// have to update
_vifRow[0] = _vifRegs->r0;
_vifRow[1] = _vifRegs->r1;
_vifRow[2] = _vifRegs->r2;
_vifRow[3] = _vifRegs->r3;
}
// used for debugging vif
// {
// int i, j, k;
// u32* curdest = olddest;
// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w");
// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr);
// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle);
// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]);
// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3);
//
// for(i = 0; i < memsize; ) {
// for(k = 0; k < vifRegs->cycle.wl; ++k) {
// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) {
// fprintf(ftemp, "%x ", curdest[4*k+j]);
// }
// }
//
// fprintf(ftemp, "\n");
// curdest += 4*vifRegs->cycle.cl;
// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl;
// }
// fclose(ftemp);
// }
// s_count++;
if (size >= ft->dsize && vifRegs->num > 0)
else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have
{
//VIF_LOG("warning, end with size = %d", size);
@ -680,13 +666,19 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
}
}
else /* filling write */
{
VIF_LOG("VIFunpack - filling write");
if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0)
DevCon::Notice("Filling write warning! Size < packet size and CL != 0");
@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data)
vif0.tag.addr &= 0xfff;
vif0.tag.size = len;
vif0Regs->offset = 0;
vifRegs = (VIFregisters*)vif0Regs;
vifMaskRegs = g_vif0Masks;
vif = &vif0;
vifRow = g_vifRow0;
}
static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size)
static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size)
{
/* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size);
/* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size);
{
FILE *f = fopen("vu1.raw", "wb");
fwrite(data, 1, size*4, f);
@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
{
if (vif0.vifpacketsize < vif0.tag.size)
{
_vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
vif0.tag.addr += vif0.vifpacketsize << 2;
vif0.tag.size -= vif0.vifpacketsize;
return vif0.vifpacketsize;
@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
{
int ret;
_vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
ret = vif0.tag.size;
vif0.tag.size = 0;
vif0.cmd = 0;
@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
{
/* size is less that the total size, transfer is 'in pieces' */
VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum);
ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum);
vif0.tag.size -= vif0.vifpacketsize;
FreezeXMMRegs(0);
return vif0.vifpacketsize;
@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
else
{
/* we got all the data, transfer it fully */
int ret;
int ret = vif0.tag.size;
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
ret = vif0.tag.size;
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
//Align data after a split transfer first
if(vif0Regs->offset != 0 || vif0.cl != 0)
{
vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
data += ret - vif0.tag.size;
if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
else
{
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
}
}
@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data)
vif1.cl = 0;
vif1.tag.addr <<= 4;
vif1.tag.cmd = vif1.cmd;
vifRegs = (VIFregisters*)vif1Regs;
vifMaskRegs = g_vif1Masks;
vif = &vif1;
vifRow = g_vifRow1;
}
static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size)
static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size)
{
/* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size);
/* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size);
{
FILE *f = fopen("vu1.raw", "wb");
fwrite(data, 1, size*4, f);
@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
{
if (vif1.vifpacketsize < vif1.tag.size)
{
_vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
vif1.tag.addr += vif1.vifpacketsize << 2;
vif1.tag.size -= vif1.vifpacketsize;
return vif1.vifpacketsize;
@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
else
{
int ret;
_vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
ret = vif1.tag.size;
vif1.tag.size = 0;
vif1.cmd = 0;
@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data)
/* size is less that the total size, transfer is
'in pieces' */
VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum);
ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum);
vif1.tag.size -= vif1.vifpacketsize;
FreezeXMMRegs(0);
return vif1.vifpacketsize;
}
else
{
int ret;
/* we got all the data, transfer it fully */
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
ret = vif1.tag.size;
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
int ret = vif1.tag.size;
if(vif1Regs->offset != 0 || vif1.cl != 0)
{
vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
data += ret - vif1.tag.size;
if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
else
{
/* we got all the data, transfer it fully */
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
}
}

View File

@ -18,9 +18,9 @@
*/
.intel_syntax noprefix
.extern _vifRegs
.extern _vifMaskRegs
.extern _vifRow
.extern vifRegs
.extern vifMaskRegs
.extern vifRow
#define VIF_ESP esp
#define VIF_SRC esi
@ -108,7 +108,7 @@
// setting up masks
#define UNPACK_Setup_Mask_SSE(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \
@ -118,7 +118,7 @@
#define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL)
#define UNPACK_Start_Setup_Mask_SSE_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
pand XMM_ROWMASK, XMM_ROW; \
@ -129,12 +129,12 @@
#define UNPACK_Setup_Mask_SSE_0_1(CL)
#define UNPACK_Setup_Mask_SSE_1_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
// ignore CL, since vif.cycle.wl == 1
#define UNPACK_Setup_Mask_SSE_2_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
@ -1312,9 +1312,9 @@
#pragma warning(disable:4731)
#define SAVE_ROW_REG_BASE \
mov VIF_TMPADDR, _vifRow; \
mov VIF_TMPADDR, vifRow; \
movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \
mov VIF_TMPADDR, _vifRegs; \
mov VIF_TMPADDR, vifRegs; \
movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \
psrldq XMM_ROW, 4; \
movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \
@ -1349,7 +1349,7 @@
.globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \
UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \
INIT_ARGS(); \
mov VIF_TMPADDR, _vifRegs; \
mov VIF_TMPADDR, vifRegs; \
movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \
sub VIF_INC, VIF_SAVEEBX; \

View File

@ -5,9 +5,9 @@
.xmm
extern _vifRegs:ptr
extern _vifMaskRegs:ptr
extern _vifRow:ptr
extern vifRegs:ptr
extern vifMaskRegs:ptr
extern vifRow:ptr
extern s_TempDecompress:ptr
@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0
UNPACK_Setup_Mask_SSE macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(CL) + 16]
movdqa xmm5, [eax + 64*(CL) + 32]
movdqa xmm3, [eax + 64*(CL)]
@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL
endm
UNPACK_Start_Setup_Mask_SSE_1 macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(CL) + 16]
movdqa xmm5, [eax + 64*(CL) + 32]
pand xmm4, xmm6
@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL
UNPACK_Setup_Mask_SSE_0_1 macro CL
endm
UNPACK_Setup_Mask_SSE_1_1 macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm3, [eax + 64*(0)]
endm
UNPACK_Setup_Mask_SSE_2_1 macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(0) + 16]
movdqa xmm5, [eax + 64*(0) + 32]
movdqa xmm3, [eax + 64*(0)]
@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType
SAVE_ROW_REG_BASE macro
mov eax, [_vifRow]
mov eax, [vifRow]
movdqa [eax], xmm6
mov eax, [_vifRegs]
mov eax, [vifRegs]
movss dword ptr [eax+0100h], xmm6
psrldq xmm6, 4
movss dword ptr [eax+0110h], xmm6
@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE
push ebx
INIT_ARGS
mov eax, [_vifRegs]
mov eax, [vifRegs]
movzx ecx, byte ptr [eax + 040h]
movzx ebx, byte ptr [eax + 041h]
sub ecx, ebx