mirror of https://github.com/PCSX2/pcsx2.git
Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
a388d2969c
commit
4df3f80d30
|
@ -25,10 +25,10 @@
|
|||
#include "Vif.h"
|
||||
#include "VifDma.h"
|
||||
|
||||
VIFregisters *_vifRegs;
|
||||
u32* _vifRow = NULL, *_vifCol = NULL;
|
||||
u32* _vifMaskRegs = NULL;
|
||||
vifStruct *_vif;
|
||||
VIFregisters *vifRegs;
|
||||
u32* vifRow = NULL, *vifCol = NULL;
|
||||
u32* vifMaskRegs = NULL;
|
||||
vifStruct *vif;
|
||||
|
||||
PCSX2_ALIGNED16(u32 g_vifRow0[4]);
|
||||
PCSX2_ALIGNED16(u32 g_vifCol0[4]);
|
||||
|
@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
|||
int n;
|
||||
u32 vifRowReg = getVifRowRegs(offnum);
|
||||
|
||||
if (_vifRegs->code & 0x10000000)
|
||||
if (vifRegs->code & 0x10000000)
|
||||
{
|
||||
switch (_vif->cl)
|
||||
switch (vif->cl)
|
||||
{
|
||||
case 0:
|
||||
if (offnum == OFFSET_X)
|
||||
n = (_vifRegs->mask) & 0x3;
|
||||
n = (vifRegs->mask) & 0x3;
|
||||
else
|
||||
n = (_vifRegs->mask >> (offnum * 2)) & 0x3;
|
||||
n = (vifRegs->mask >> (offnum * 2)) & 0x3;
|
||||
break;
|
||||
case 1:
|
||||
n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
|
||||
n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
|
||||
break;
|
||||
case 2:
|
||||
n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
|
||||
n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
|
||||
break;
|
||||
default:
|
||||
n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
|
||||
n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
|||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
if ((_vif->cmd & 0x6F) == 0x6f)
|
||||
if ((vif->cmd & 0x6F) == 0x6f)
|
||||
{
|
||||
dest = data;
|
||||
}
|
||||
else switch (_vifRegs->mode)
|
||||
else switch (vifRegs->mode)
|
||||
{
|
||||
case 1:
|
||||
dest = data + vifRowReg;
|
||||
|
@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
|||
dest = vifRowReg;
|
||||
break;
|
||||
case 2:
|
||||
dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl);
|
||||
dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
|
||||
break;
|
||||
case 3:
|
||||
//Masked so don't do anything
|
||||
break;
|
||||
}
|
||||
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data);
|
||||
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size)
|
|||
template <class T>
|
||||
void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
|
||||
{
|
||||
if (_vifRegs->offset == OFFSET_X)
|
||||
if (vifRegs->offset == OFFSET_X)
|
||||
{
|
||||
if (size > 0)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
||||
_vifRegs->offset = OFFSET_Y;
|
||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||
vifRegs->offset = OFFSET_Y;
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if (_vifRegs->offset == OFFSET_Y)
|
||||
if (vifRegs->offset == OFFSET_Y)
|
||||
{
|
||||
if (size > 0)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *data);
|
||||
_vifRegs->offset = OFFSET_Z;
|
||||
writeXYZW(vifRegs->offset, *dest++, *data);
|
||||
vifRegs->offset = OFFSET_Z;
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if (_vifRegs->offset == OFFSET_Z)
|
||||
if (vifRegs->offset == OFFSET_Z)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *dest-2);
|
||||
_vifRegs->offset = OFFSET_W;
|
||||
writeXYZW(vifRegs->offset, *dest++, *dest-2);
|
||||
vifRegs->offset = OFFSET_W;
|
||||
}
|
||||
|
||||
if (_vifRegs->offset == OFFSET_W)
|
||||
if (vifRegs->offset == OFFSET_W)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest, *data);
|
||||
_vifRegs->offset = OFFSET_X;
|
||||
writeXYZW(vifRegs->offset, *dest, *data);
|
||||
vifRegs->offset = OFFSET_X;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
|
||||
{
|
||||
if(_vifRegs->offset == OFFSET_X)
|
||||
if(vifRegs->offset == OFFSET_X)
|
||||
{
|
||||
if (size > 0)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
||||
_vifRegs->offset = OFFSET_Y;
|
||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||
vifRegs->offset = OFFSET_Y;
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if(_vifRegs->offset == OFFSET_Y)
|
||||
if(vifRegs->offset == OFFSET_Y)
|
||||
{
|
||||
if (size > 0)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
||||
_vifRegs->offset = OFFSET_Z;
|
||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||
vifRegs->offset = OFFSET_Z;
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if(_vifRegs->offset == OFFSET_Z)
|
||||
if(vifRegs->offset == OFFSET_Z)
|
||||
{
|
||||
if (size > 0)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
||||
_vifRegs->offset = OFFSET_W;
|
||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||
vifRegs->offset = OFFSET_W;
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if(_vifRegs->offset == OFFSET_W)
|
||||
if(vifRegs->offset == OFFSET_W)
|
||||
{
|
||||
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
|
||||
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
|
||||
writeXYZW(_vifRegs->offset, *dest, *data);
|
||||
_vifRegs->offset = OFFSET_X;
|
||||
writeXYZW(vifRegs->offset, *dest, *data);
|
||||
vifRegs->offset = OFFSET_X;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
|
|||
{
|
||||
while (size > 0)
|
||||
{
|
||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
||||
_vifRegs->offset++;
|
||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||
vifRegs->offset++;
|
||||
size--;
|
||||
}
|
||||
|
||||
if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X;
|
||||
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
|
||||
}
|
||||
|
||||
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
|
||||
|
@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer()
|
|||
return ret;
|
||||
}
|
||||
|
||||
static __forceinline int mfifoVIF1chain()
|
||||
static __forceinline int mfifo_VIF1chain()
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -531,7 +530,7 @@ void vifMFIFOInterrupt()
|
|||
{
|
||||
g_vifCycles = 0;
|
||||
|
||||
if (vif1.inprogress == 1) mfifoVIF1chain();
|
||||
if (vif1.inprogress == 1) mfifo_VIF1chain();
|
||||
|
||||
if (vif1.irq && vif1.tag.size == 0)
|
||||
{
|
||||
|
|
38
pcsx2/Vif.h
38
pcsx2/Vif.h
|
@ -81,9 +81,9 @@ struct VIFregisters {
|
|||
extern "C"
|
||||
{
|
||||
// these use cdecl for Asm code references.
|
||||
extern VIFregisters *_vifRegs;
|
||||
extern u32* _vifMaskRegs;
|
||||
extern u32* _vifRow;
|
||||
extern VIFregisters *vifRegs;
|
||||
extern u32* vifMaskRegs;
|
||||
extern u32* vifRow;
|
||||
extern u32* _vifCol;
|
||||
}
|
||||
|
||||
|
@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
|
|||
switch (reg)
|
||||
{
|
||||
case 0:
|
||||
_vifRegs->r0 = data;
|
||||
vifRegs->r0 = data;
|
||||
break;
|
||||
case 1:
|
||||
_vifRegs->r1 = data;
|
||||
vifRegs->r1 = data;
|
||||
break;
|
||||
case 2:
|
||||
_vifRegs->r2 = data;
|
||||
vifRegs->r2 = data;
|
||||
break;
|
||||
case 3:
|
||||
_vifRegs->r3 = data;
|
||||
vifRegs->r3 = data;
|
||||
break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
|
@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg)
|
|||
switch (reg)
|
||||
{
|
||||
case 0:
|
||||
return _vifRegs->r0;
|
||||
return vifRegs->r0;
|
||||
break;
|
||||
case 1:
|
||||
return _vifRegs->r1;
|
||||
return vifRegs->r1;
|
||||
break;
|
||||
case 2:
|
||||
return _vifRegs->r2;
|
||||
return vifRegs->r2;
|
||||
break;
|
||||
case 3:
|
||||
return _vifRegs->r3;
|
||||
return vifRegs->r3;
|
||||
break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
|
@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data)
|
|||
switch (reg)
|
||||
{
|
||||
case 0:
|
||||
_vifRegs->c0 = data;
|
||||
vifRegs->c0 = data;
|
||||
break;
|
||||
case 1:
|
||||
_vifRegs->c1 = data;
|
||||
vifRegs->c1 = data;
|
||||
break;
|
||||
case 2:
|
||||
_vifRegs->c2 = data;
|
||||
vifRegs->c2 = data;
|
||||
break;
|
||||
case 3:
|
||||
_vifRegs->c3 = data;
|
||||
vifRegs->c3 = data;
|
||||
break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
|
@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg)
|
|||
switch (reg)
|
||||
{
|
||||
case 0:
|
||||
return _vifRegs->c0;
|
||||
return vifRegs->c0;
|
||||
break;
|
||||
case 1:
|
||||
return _vifRegs->c1;
|
||||
return vifRegs->c1;
|
||||
break;
|
||||
case 2:
|
||||
return _vifRegs->c2;
|
||||
return vifRegs->c2;
|
||||
break;
|
||||
case 3:
|
||||
return _vifRegs->c3;
|
||||
return vifRegs->c3;
|
||||
break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
|
|
463
pcsx2/VifDma.cpp
463
pcsx2/VifDma.cpp
|
@ -37,10 +37,10 @@ using namespace std; // for min / max
|
|||
extern "C"
|
||||
{
|
||||
// Need cdecl on these for ASM references.
|
||||
extern VIFregisters *_vifRegs;
|
||||
extern u32* _vifMaskRegs;
|
||||
extern u32* _vifRow;
|
||||
extern u32* _vifCol;
|
||||
extern VIFregisters *vifRegs;
|
||||
extern u32* vifMaskRegs;
|
||||
extern u32* vifRow;
|
||||
extern u32* vifCol;
|
||||
}
|
||||
|
||||
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]);
|
||||
|
@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]);
|
|||
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]);
|
||||
|
||||
extern vifStruct *_vif;
|
||||
extern vifStruct *vif;
|
||||
|
||||
vifStruct vif0, vif1;
|
||||
|
||||
|
@ -254,21 +254,9 @@ __forceinline static int _limit(int a, int max)
|
|||
static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum)
|
||||
{
|
||||
const VIFUnpackFuncTable *unpack;
|
||||
vifStruct *vif;
|
||||
VIFregisters *vifRegs;
|
||||
|
||||
unpack = &VIFfuncTable[ unpackType ];
|
||||
|
||||
if (VIFdmanum == 0)
|
||||
{
|
||||
vif = &vif0;
|
||||
vifRegs = vif0Regs;
|
||||
}
|
||||
else
|
||||
{
|
||||
vif = &vif1;
|
||||
vifRegs = vif1Regs;
|
||||
}
|
||||
|
||||
switch (unpackType)
|
||||
{
|
||||
case 0x0:
|
||||
|
@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int
|
|||
}
|
||||
|
||||
//This is sorted out later
|
||||
vif->tag.addr &= ~0xf;
|
||||
if((vif->tag.addr & 0xf) != (vifRegs->offset * 4))
|
||||
{
|
||||
VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr);
|
||||
vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
|
||||
static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
|
||||
{
|
||||
u32 *dest;
|
||||
u32 unpackType;
|
||||
UNPACKFUNCTYPE func;
|
||||
const VIFUnpackFuncTable *ft;
|
||||
vifStruct *vif;
|
||||
VIFregisters *vifRegs;
|
||||
VURegs * VU;
|
||||
u8 *cdata = (u8*)data;
|
||||
|
||||
|
||||
#ifdef _DEBUG
|
||||
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
|
||||
#endif
|
||||
|
||||
_mm_prefetch((char*)data, _MM_HINT_NTA);
|
||||
|
||||
if (VIFdmanum == 0)
|
||||
{
|
||||
VU = &VU0;
|
||||
vif = &vif0;
|
||||
vifRegs = vif0Regs;
|
||||
assert(v->addr < memsize);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
VU = &VU1;
|
||||
vif = &vif1;
|
||||
vifRegs = vif1Regs;
|
||||
assert(v->addr < memsize);
|
||||
|
||||
if (vu1MicroIsSkipping())
|
||||
{
|
||||
// don't process since the frame is dummy
|
||||
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
|
||||
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
||||
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
||||
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
||||
|
||||
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
||||
#ifdef _DEBUG
|
||||
if (v->size != size)
|
||||
{
|
||||
VIF_LOG("*PCSX2*: warning v->size != size");
|
||||
}
|
||||
|
||||
if ((v->addr + size*4) > memsize)
|
||||
{
|
||||
Console::Notice("*PCSX2*: fixme unpack overflow");
|
||||
Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x",
|
||||
params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// The unpack type
|
||||
unpackType = v->cmd & 0xf;
|
||||
|
||||
if (size == 0)
|
||||
{
|
||||
VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask);
|
||||
}
|
||||
|
||||
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
||||
|
||||
_vifRegs = (VIFregisters*)vifRegs;
|
||||
_vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks;
|
||||
_vif = vif;
|
||||
_vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
|
||||
ft = &VIFfuncTable[ unpackType ];
|
||||
func = _vif->usn ? ft->funcU : ft->funcS;
|
||||
func = vif->usn ? ft->funcU : ft->funcS;
|
||||
|
||||
size <<= 2;
|
||||
|
||||
|
@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
memsize = size;
|
||||
#endif
|
||||
|
||||
if (v->size != (size >> 2))
|
||||
ProcessMemSkip(size, unpackType, VIFdmanum);
|
||||
|
||||
|
||||
if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000))
|
||||
{
|
||||
//Sanity Check (memory overflow)
|
||||
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
|
||||
|
||||
}
|
||||
|
||||
if (_vifRegs->offset > 0)
|
||||
{
|
||||
int destinc, unpacksize;
|
||||
if(vif1Regs->offset != 0)
|
||||
{
|
||||
int unpacksize;
|
||||
|
||||
//This is just to make sure the alignment isnt loopy on a split packet
|
||||
if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
|
||||
if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
|
||||
{
|
||||
DevCon::Error("Warning: Unpack alignment error");
|
||||
}
|
||||
|
@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
|
||||
if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize)
|
||||
VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset);
|
||||
// SSE doesn't handle such small data
|
||||
|
||||
if (vifRegs->offset < (u32)ft->qsize)
|
||||
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
|
||||
{
|
||||
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
|
||||
{
|
||||
Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
|
||||
}
|
||||
DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
|
||||
}
|
||||
unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
unpacksize = 0;
|
||||
Console::WriteLn("Unpack align offset = 0");
|
||||
}
|
||||
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
|
||||
destinc = (4 - ft->qsize) + unpacksize;
|
||||
|
||||
|
||||
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
|
||||
|
||||
func(dest, (u32*)cdata, unpacksize);
|
||||
size -= unpacksize * ft->dsize;
|
||||
cdata += unpacksize * ft->dsize;
|
||||
|
||||
|
||||
vifRegs->num--;
|
||||
++vif->cl;
|
||||
|
||||
if (vif->cl == vifRegs->cycle.wl)
|
||||
{
|
||||
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
|
||||
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
|
||||
{
|
||||
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4;
|
||||
//dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
|
||||
}
|
||||
else
|
||||
dest += destinc;
|
||||
{
|
||||
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
|
||||
//dest += destinc;
|
||||
}
|
||||
vif->cl = 0;
|
||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||
return size >> 2;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
dest += destinc;
|
||||
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
|
||||
dest += (4 - ft->qsize) + unpacksize;
|
||||
cdata += unpacksize * ft->dsize;
|
||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||
}
|
||||
|
||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
||||
if (vif->cl != 0) //Check alignment for SSE unpacks
|
||||
{
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
|
||||
int incdest;
|
||||
|
||||
if (vif->cl != 0)
|
||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
||||
{
|
||||
// continuation from last stream
|
||||
|
||||
|
@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
if (vif->cl == vifRegs->cycle.wl)
|
||||
{
|
||||
dest += incdest;
|
||||
vif->tag.addr += incdest * 4;
|
||||
vif->cl = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
dest += 4;
|
||||
vif->tag.addr += 16;
|
||||
}
|
||||
|
||||
// have to update
|
||||
_vifRow[0] = _vifRegs->r0;
|
||||
_vifRow[1] = _vifRegs->r1;
|
||||
_vifRow[2] = _vifRegs->r2;
|
||||
_vifRow[3] = _vifRegs->r3;
|
||||
if(vifRegs->mode == 2)
|
||||
{
|
||||
//Update the reg rows for SSE
|
||||
vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
|
||||
vifRow[0] = vifRegs->r0;
|
||||
vifRow[1] = vifRegs->r1;
|
||||
vifRow[2] = vifRegs->r2;
|
||||
vifRow[3] = vifRegs->r3;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return size>>2;
|
||||
}
|
||||
|
||||
if ((size >= ft->gsize) && !(v->addr&0xf))
|
||||
|
||||
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
|
||||
{
|
||||
u32 *dest;
|
||||
u32 unpackType;
|
||||
UNPACKFUNCTYPE func;
|
||||
const VIFUnpackFuncTable *ft;
|
||||
VURegs * VU;
|
||||
u8 *cdata = (u8*)data;
|
||||
|
||||
#ifdef _DEBUG
|
||||
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
|
||||
#endif
|
||||
|
||||
_mm_prefetch((char*)data, _MM_HINT_NTA);
|
||||
|
||||
if (VIFdmanum == 0)
|
||||
{
|
||||
VU = &VU0;
|
||||
//vifRegs = vif0Regs;
|
||||
assert(v->addr < memsize);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
VU = &VU1;
|
||||
//vifRegs = vif1Regs;
|
||||
assert(v->addr < memsize);
|
||||
|
||||
if (vu1MicroIsSkipping())
|
||||
{
|
||||
// don't process since the frame is dummy
|
||||
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
|
||||
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
||||
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
||||
|
||||
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
||||
|
||||
// The unpack type
|
||||
unpackType = v->cmd & 0xf;
|
||||
|
||||
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
||||
|
||||
ft = &VIFfuncTable[ unpackType ];
|
||||
func = vif->usn ? ft->funcU : ft->funcS;
|
||||
|
||||
size <<= 2;
|
||||
|
||||
#ifdef _DEBUG
|
||||
memsize = size;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef VIFUNPACKDEBUG
|
||||
|
||||
if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) *
|
||||
((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000))
|
||||
{
|
||||
//Sanity Check (memory overflow)
|
||||
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
||||
{
|
||||
|
||||
#ifdef _DEBUG
|
||||
static int s_count = 0;
|
||||
#endif
|
||||
|
||||
|
||||
if (size >= ft->gsize)
|
||||
{
|
||||
const UNPACKPARTFUNCTYPESSE* pfn;
|
||||
int writemask;
|
||||
|
@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
|
||||
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
|
||||
|
||||
if(vifRegs->mode == 2)
|
||||
{
|
||||
//Update the reg rows for non SSE
|
||||
vifRegs->r0 = vifRow[0];
|
||||
vifRegs->r1 = vifRow[1];
|
||||
vifRegs->r2 = vifRow[2];
|
||||
vifRegs->r3 = vifRow[3];
|
||||
}
|
||||
|
||||
|
||||
// if size is left over, update the src,dst pointers
|
||||
if (writemask > 0)
|
||||
{
|
||||
|
@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
cdata += left * ft->gsize;
|
||||
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
|
||||
vifRegs->num -= left;
|
||||
_vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
||||
vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
||||
size = writemask;
|
||||
|
||||
if (size >= ft->dsize && vifRegs->num > 0)
|
||||
{
|
||||
//VIF_LOG("warning, end with size = %d", size);
|
||||
|
||||
/* unpack one qword */
|
||||
vif->tag.addr += (size / ft->dsize) * 4;
|
||||
func(dest, (u32*)cdata, size / ft->dsize);
|
||||
size = 0;
|
||||
|
||||
if(vifRegs->mode == 2)
|
||||
{
|
||||
//Update the reg rows for SSE
|
||||
vifRow[0] = vifRegs->r0;
|
||||
vifRow[1] = vifRegs->r1;
|
||||
vifRow[2] = vifRegs->r2;
|
||||
vifRow[3] = vifRegs->r3;
|
||||
}
|
||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vifRegs->num -= size / ft->gsize;
|
||||
if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
||||
if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
||||
size = 0;
|
||||
}
|
||||
|
||||
size = writemask;
|
||||
|
||||
_vifRegs->r0 = _vifRow[0];
|
||||
_vifRegs->r1 = _vifRow[1];
|
||||
_vifRegs->r2 = _vifRow[2];
|
||||
_vifRegs->r3 = _vifRow[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available
|
||||
{
|
||||
// v4-32
|
||||
if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0))
|
||||
{
|
||||
vifRegs->num -= size >> 4;
|
||||
memcpy_fast((u8*)dest, cdata, size);
|
||||
size = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
||||
|
||||
while ((size >= ft->gsize) && (vifRegs->num > 0))
|
||||
{
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
size -= ft->gsize;
|
||||
|
||||
vifRegs->num--;
|
||||
//if(vifRegs->num == loophere) dest = (u32*)(VU->Mem);
|
||||
++vif->cl;
|
||||
if (vif->cl == vifRegs->cycle.wl)
|
||||
{
|
||||
dest += incdest;
|
||||
vif->cl = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest += 4;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// have to update
|
||||
_vifRow[0] = _vifRegs->r0;
|
||||
_vifRow[1] = _vifRegs->r1;
|
||||
_vifRow[2] = _vifRegs->r2;
|
||||
_vifRow[3] = _vifRegs->r3;
|
||||
}
|
||||
|
||||
// used for debugging vif
|
||||
// {
|
||||
// int i, j, k;
|
||||
// u32* curdest = olddest;
|
||||
// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w");
|
||||
// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr);
|
||||
// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle);
|
||||
// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]);
|
||||
// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3);
|
||||
//
|
||||
// for(i = 0; i < memsize; ) {
|
||||
// for(k = 0; k < vifRegs->cycle.wl; ++k) {
|
||||
// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) {
|
||||
// fprintf(ftemp, "%x ", curdest[4*k+j]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// fprintf(ftemp, "\n");
|
||||
// curdest += 4*vifRegs->cycle.cl;
|
||||
// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl;
|
||||
// }
|
||||
// fclose(ftemp);
|
||||
// }
|
||||
// s_count++;
|
||||
|
||||
if (size >= ft->dsize && vifRegs->num > 0)
|
||||
}
|
||||
else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have
|
||||
{
|
||||
//VIF_LOG("warning, end with size = %d", size);
|
||||
|
||||
|
@ -679,14 +665,20 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
vif->tag.addr += (size / ft->dsize) * 4;
|
||||
func(dest, (u32*)cdata, size / ft->dsize);
|
||||
size = 0;
|
||||
|
||||
|
||||
if(vifRegs->mode == 2)
|
||||
{
|
||||
//Update the reg rows for SSE
|
||||
vifRow[0] = vifRegs->r0;
|
||||
vifRow[1] = vifRegs->r1;
|
||||
vifRow[2] = vifRegs->r2;
|
||||
vifRow[3] = vifRegs->r3;
|
||||
}
|
||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
||||
}
|
||||
|
||||
}
|
||||
else /* filling write */
|
||||
{
|
||||
VIF_LOG("VIFunpack - filling write");
|
||||
|
||||
if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0)
|
||||
DevCon::Notice("Filling write warning! Size < packet size and CL != 0");
|
||||
|
@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data)
|
|||
vif0.tag.addr &= 0xfff;
|
||||
vif0.tag.size = len;
|
||||
vif0Regs->offset = 0;
|
||||
|
||||
vifRegs = (VIFregisters*)vif0Regs;
|
||||
vifMaskRegs = g_vif0Masks;
|
||||
vif = &vif0;
|
||||
vifRow = g_vifRow0;
|
||||
}
|
||||
|
||||
static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size)
|
||||
static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size)
|
||||
{
|
||||
/* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size);
|
||||
/* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size);
|
||||
{
|
||||
FILE *f = fopen("vu1.raw", "wb");
|
||||
fwrite(data, 1, size*4, f);
|
||||
|
@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
|
|||
{
|
||||
if (vif0.vifpacketsize < vif0.tag.size)
|
||||
{
|
||||
_vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
|
||||
vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
|
||||
vif0.tag.addr += vif0.vifpacketsize << 2;
|
||||
vif0.tag.size -= vif0.vifpacketsize;
|
||||
return vif0.vifpacketsize;
|
||||
|
@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
|
|||
{
|
||||
int ret;
|
||||
|
||||
_vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
|
||||
vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
|
||||
ret = vif0.tag.size;
|
||||
vif0.tag.size = 0;
|
||||
vif0.cmd = 0;
|
||||
|
@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
|
|||
{
|
||||
/* size is less that the total size, transfer is 'in pieces' */
|
||||
VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum);
|
||||
|
||||
ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum);
|
||||
|
||||
vif0.tag.size -= vif0.vifpacketsize;
|
||||
FreezeXMMRegs(0);
|
||||
return vif0.vifpacketsize;
|
||||
|
@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
|
|||
else
|
||||
{
|
||||
/* we got all the data, transfer it fully */
|
||||
int ret;
|
||||
int ret = vif0.tag.size;
|
||||
|
||||
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||
ret = vif0.tag.size;
|
||||
vif0.tag.size = 0;
|
||||
vif0.cmd = 0;
|
||||
FreezeXMMRegs(0);
|
||||
return ret;
|
||||
//Align data after a split transfer first
|
||||
if(vif0Regs->offset != 0 || vif0.cl != 0)
|
||||
{
|
||||
vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||
data += ret - vif0.tag.size;
|
||||
if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||
vif0.tag.size = 0;
|
||||
vif0.cmd = 0;
|
||||
FreezeXMMRegs(0);
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||
vif0.tag.size = 0;
|
||||
vif0.cmd = 0;
|
||||
FreezeXMMRegs(0);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data)
|
|||
vif1.cl = 0;
|
||||
vif1.tag.addr <<= 4;
|
||||
vif1.tag.cmd = vif1.cmd;
|
||||
|
||||
vifRegs = (VIFregisters*)vif1Regs;
|
||||
vifMaskRegs = g_vif1Masks;
|
||||
vif = &vif1;
|
||||
vifRow = g_vifRow1;
|
||||
}
|
||||
|
||||
static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size)
|
||||
static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size)
|
||||
{
|
||||
/* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size);
|
||||
/* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size);
|
||||
{
|
||||
FILE *f = fopen("vu1.raw", "wb");
|
||||
fwrite(data, 1, size*4, f);
|
||||
|
@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
|
|||
{
|
||||
if (vif1.vifpacketsize < vif1.tag.size)
|
||||
{
|
||||
_vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
|
||||
vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
|
||||
vif1.tag.addr += vif1.vifpacketsize << 2;
|
||||
vif1.tag.size -= vif1.vifpacketsize;
|
||||
return vif1.vifpacketsize;
|
||||
|
@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
|
|||
else
|
||||
{
|
||||
int ret;
|
||||
_vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
|
||||
vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
|
||||
ret = vif1.tag.size;
|
||||
vif1.tag.size = 0;
|
||||
vif1.cmd = 0;
|
||||
|
@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data)
|
|||
/* size is less that the total size, transfer is
|
||||
'in pieces' */
|
||||
VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum);
|
||||
|
||||
ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum);
|
||||
vif1.tag.size -= vif1.vifpacketsize;
|
||||
FreezeXMMRegs(0);
|
||||
return vif1.vifpacketsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
int ret;
|
||||
/* we got all the data, transfer it fully */
|
||||
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||
ret = vif1.tag.size;
|
||||
vif1.tag.size = 0;
|
||||
vif1.cmd = 0;
|
||||
FreezeXMMRegs(0);
|
||||
return ret;
|
||||
int ret = vif1.tag.size;
|
||||
|
||||
if(vif1Regs->offset != 0 || vif1.cl != 0)
|
||||
{
|
||||
vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||
data += ret - vif1.tag.size;
|
||||
if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||
vif1.tag.size = 0;
|
||||
vif1.cmd = 0;
|
||||
FreezeXMMRegs(0);
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we got all the data, transfer it fully */
|
||||
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||
vif1.tag.size = 0;
|
||||
vif1.cmd = 0;
|
||||
FreezeXMMRegs(0);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
*/
|
||||
.intel_syntax noprefix
|
||||
|
||||
.extern _vifRegs
|
||||
.extern _vifMaskRegs
|
||||
.extern _vifRow
|
||||
.extern vifRegs
|
||||
.extern vifMaskRegs
|
||||
.extern vifRow
|
||||
|
||||
#define VIF_ESP esp
|
||||
#define VIF_SRC esi
|
||||
|
@ -108,7 +108,7 @@
|
|||
|
||||
// setting up masks
|
||||
#define UNPACK_Setup_Mask_SSE(CL) \
|
||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
||||
mov VIF_TMPADDR, vifMaskRegs; \
|
||||
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
|
||||
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
|
||||
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \
|
||||
|
@ -118,7 +118,7 @@
|
|||
|
||||
#define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL)
|
||||
#define UNPACK_Start_Setup_Mask_SSE_1(CL) \
|
||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
||||
mov VIF_TMPADDR, vifMaskRegs; \
|
||||
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
|
||||
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
|
||||
pand XMM_ROWMASK, XMM_ROW; \
|
||||
|
@ -129,12 +129,12 @@
|
|||
|
||||
#define UNPACK_Setup_Mask_SSE_0_1(CL)
|
||||
#define UNPACK_Setup_Mask_SSE_1_1(CL) \
|
||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
||||
mov VIF_TMPADDR, vifMaskRegs; \
|
||||
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
|
||||
|
||||
// ignore CL, since vif.cycle.wl == 1
|
||||
#define UNPACK_Setup_Mask_SSE_2_1(CL) \
|
||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
||||
mov VIF_TMPADDR, vifMaskRegs; \
|
||||
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \
|
||||
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \
|
||||
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
|
||||
|
@ -1312,9 +1312,9 @@
|
|||
#pragma warning(disable:4731)
|
||||
|
||||
#define SAVE_ROW_REG_BASE \
|
||||
mov VIF_TMPADDR, _vifRow; \
|
||||
mov VIF_TMPADDR, vifRow; \
|
||||
movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \
|
||||
mov VIF_TMPADDR, _vifRegs; \
|
||||
mov VIF_TMPADDR, vifRegs; \
|
||||
movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \
|
||||
psrldq XMM_ROW, 4; \
|
||||
movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \
|
||||
|
@ -1349,7 +1349,7 @@
|
|||
.globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \
|
||||
UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \
|
||||
INIT_ARGS(); \
|
||||
mov VIF_TMPADDR, _vifRegs; \
|
||||
mov VIF_TMPADDR, vifRegs; \
|
||||
movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \
|
||||
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \
|
||||
sub VIF_INC, VIF_SAVEEBX; \
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
.xmm
|
||||
|
||||
|
||||
extern _vifRegs:ptr
|
||||
extern _vifMaskRegs:ptr
|
||||
extern _vifRow:ptr
|
||||
extern vifRegs:ptr
|
||||
extern vifMaskRegs:ptr
|
||||
extern vifRow:ptr
|
||||
extern s_TempDecompress:ptr
|
||||
|
||||
|
||||
|
@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0
|
|||
|
||||
|
||||
UNPACK_Setup_Mask_SSE macro CL
|
||||
mov eax, [_vifMaskRegs]
|
||||
mov eax, [vifMaskRegs]
|
||||
movdqa xmm4, [eax + 64*(CL) + 16]
|
||||
movdqa xmm5, [eax + 64*(CL) + 32]
|
||||
movdqa xmm3, [eax + 64*(CL)]
|
||||
|
@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL
|
|||
endm
|
||||
|
||||
UNPACK_Start_Setup_Mask_SSE_1 macro CL
|
||||
mov eax, [_vifMaskRegs]
|
||||
mov eax, [vifMaskRegs]
|
||||
movdqa xmm4, [eax + 64*(CL) + 16]
|
||||
movdqa xmm5, [eax + 64*(CL) + 32]
|
||||
pand xmm4, xmm6
|
||||
|
@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL
|
|||
UNPACK_Setup_Mask_SSE_0_1 macro CL
|
||||
endm
|
||||
UNPACK_Setup_Mask_SSE_1_1 macro CL
|
||||
mov eax, [_vifMaskRegs]
|
||||
mov eax, [vifMaskRegs]
|
||||
movdqa xmm3, [eax + 64*(0)]
|
||||
endm
|
||||
|
||||
|
||||
UNPACK_Setup_Mask_SSE_2_1 macro CL
|
||||
|
||||
mov eax, [_vifMaskRegs]
|
||||
mov eax, [vifMaskRegs]
|
||||
movdqa xmm4, [eax + 64*(0) + 16]
|
||||
movdqa xmm5, [eax + 64*(0) + 32]
|
||||
movdqa xmm3, [eax + 64*(0)]
|
||||
|
@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|||
|
||||
|
||||
SAVE_ROW_REG_BASE macro
|
||||
mov eax, [_vifRow]
|
||||
mov eax, [vifRow]
|
||||
movdqa [eax], xmm6
|
||||
mov eax, [_vifRegs]
|
||||
mov eax, [vifRegs]
|
||||
movss dword ptr [eax+0100h], xmm6
|
||||
psrldq xmm6, 4
|
||||
movss dword ptr [eax+0110h], xmm6
|
||||
|
@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE
|
|||
push ebx
|
||||
|
||||
INIT_ARGS
|
||||
mov eax, [_vifRegs]
|
||||
mov eax, [vifRegs]
|
||||
movzx ecx, byte ptr [eax + 040h]
|
||||
movzx ebx, byte ptr [eax + 041h]
|
||||
sub ecx, ebx
|
||||
|
|
Loading…
Reference in New Issue