Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
refraction 2009-04-11 17:45:14 +00:00
parent a388d2969c
commit 4df3f80d30
5 changed files with 331 additions and 299 deletions

View File

@ -25,10 +25,10 @@
#include "Vif.h" #include "Vif.h"
#include "VifDma.h" #include "VifDma.h"
VIFregisters *_vifRegs; VIFregisters *vifRegs;
u32* _vifRow = NULL, *_vifCol = NULL; u32* vifRow = NULL, *vifCol = NULL;
u32* _vifMaskRegs = NULL; u32* vifMaskRegs = NULL;
vifStruct *_vif; vifStruct *vif;
PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifRow0[4]);
PCSX2_ALIGNED16(u32 g_vifCol0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]);
@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
int n; int n;
u32 vifRowReg = getVifRowRegs(offnum); u32 vifRowReg = getVifRowRegs(offnum);
if (_vifRegs->code & 0x10000000) if (vifRegs->code & 0x10000000)
{ {
switch (_vif->cl) switch (vif->cl)
{ {
case 0: case 0:
if (offnum == OFFSET_X) if (offnum == OFFSET_X)
n = (_vifRegs->mask) & 0x3; n = (vifRegs->mask) & 0x3;
else else
n = (_vifRegs->mask >> (offnum * 2)) & 0x3; n = (vifRegs->mask >> (offnum * 2)) & 0x3;
break; break;
case 1: case 1:
n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
break; break;
case 2: case 2:
n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3; n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
break; break;
default: default:
n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3; n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
break; break;
} }
} }
@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
switch (n) switch (n)
{ {
case 0: case 0:
if ((_vif->cmd & 0x6F) == 0x6f) if ((vif->cmd & 0x6F) == 0x6f)
{ {
dest = data; dest = data;
} }
else switch (_vifRegs->mode) else switch (vifRegs->mode)
{ {
case 1: case 1:
dest = data + vifRowReg; dest = data + vifRowReg;
@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
dest = vifRowReg; dest = vifRowReg;
break; break;
case 2: case 2:
dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
break; break;
case 3: case 3:
//Masked so don't do anything
break; break;
} }
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
} }
template <class T> template <class T>
@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size)
template <class T> template <class T>
void __fastcall UNPACK_V2(u32 *dest, T *data, int size) void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
{ {
if (_vifRegs->offset == OFFSET_X) if (vifRegs->offset == OFFSET_X)
{ {
if (size > 0) if (size > 0)
{ {
writeXYZW(_vifRegs->offset, *dest++, *data++); writeXYZW(vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_Y; vifRegs->offset = OFFSET_Y;
size--; size--;
} }
} }
if (_vifRegs->offset == OFFSET_Y) if (vifRegs->offset == OFFSET_Y)
{ {
if (size > 0) if (size > 0)
{ {
writeXYZW(_vifRegs->offset, *dest++, *data); writeXYZW(vifRegs->offset, *dest++, *data);
_vifRegs->offset = OFFSET_Z; vifRegs->offset = OFFSET_Z;
size--; size--;
} }
} }
if (_vifRegs->offset == OFFSET_Z) if (vifRegs->offset == OFFSET_Z)
{ {
writeXYZW(_vifRegs->offset, *dest++, *dest-2); writeXYZW(vifRegs->offset, *dest++, *dest-2);
_vifRegs->offset = OFFSET_W; vifRegs->offset = OFFSET_W;
} }
if (_vifRegs->offset == OFFSET_W) if (vifRegs->offset == OFFSET_W)
{ {
writeXYZW(_vifRegs->offset, *dest, *data); writeXYZW(vifRegs->offset, *dest, *data);
_vifRegs->offset = OFFSET_X; vifRegs->offset = OFFSET_X;
} }
} }
template <class T> template <class T>
void __fastcall UNPACK_V3(u32 *dest, T *data, int size) void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
{ {
if(_vifRegs->offset == OFFSET_X) if(vifRegs->offset == OFFSET_X)
{ {
if (size > 0) if (size > 0)
{ {
writeXYZW(_vifRegs->offset, *dest++, *data++); writeXYZW(vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_Y; vifRegs->offset = OFFSET_Y;
size--; size--;
} }
} }
if(_vifRegs->offset == OFFSET_Y) if(vifRegs->offset == OFFSET_Y)
{ {
if (size > 0) if (size > 0)
{ {
writeXYZW(_vifRegs->offset, *dest++, *data++); writeXYZW(vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_Z; vifRegs->offset = OFFSET_Z;
size--; size--;
} }
} }
if(_vifRegs->offset == OFFSET_Z) if(vifRegs->offset == OFFSET_Z)
{ {
if (size > 0) if (size > 0)
{ {
writeXYZW(_vifRegs->offset, *dest++, *data++); writeXYZW(vifRegs->offset, *dest++, *data++);
_vifRegs->offset = OFFSET_W; vifRegs->offset = OFFSET_W;
size--; size--;
} }
} }
if(_vifRegs->offset == OFFSET_W) if(vifRegs->offset == OFFSET_W)
{ {
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
writeXYZW(_vifRegs->offset, *dest, *data); writeXYZW(vifRegs->offset, *dest, *data);
_vifRegs->offset = OFFSET_X; vifRegs->offset = OFFSET_X;
} }
} }
@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
{ {
while (size > 0) while (size > 0)
{ {
writeXYZW(_vifRegs->offset, *dest++, *data++); writeXYZW(vifRegs->offset, *dest++, *data++);
_vifRegs->offset++; vifRegs->offset++;
size--; size--;
} }
if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
} }
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer()
return ret; return ret;
} }
static __forceinline int mfifoVIF1chain() static __forceinline int mfifo_VIF1chain()
{ {
int ret; int ret;
@ -531,7 +530,7 @@ void vifMFIFOInterrupt()
{ {
g_vifCycles = 0; g_vifCycles = 0;
if (vif1.inprogress == 1) mfifoVIF1chain(); if (vif1.inprogress == 1) mfifo_VIF1chain();
if (vif1.irq && vif1.tag.size == 0) if (vif1.irq && vif1.tag.size == 0)
{ {

View File

@ -81,9 +81,9 @@ struct VIFregisters {
extern "C" extern "C"
{ {
// these use cdecl for Asm code references. // these use cdecl for Asm code references.
extern VIFregisters *_vifRegs; extern VIFregisters *vifRegs;
extern u32* _vifMaskRegs; extern u32* vifMaskRegs;
extern u32* _vifRow; extern u32* vifRow;
extern u32* _vifCol; extern u32* _vifCol;
} }
@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
switch (reg) switch (reg)
{ {
case 0: case 0:
_vifRegs->r0 = data; vifRegs->r0 = data;
break; break;
case 1: case 1:
_vifRegs->r1 = data; vifRegs->r1 = data;
break; break;
case 2: case 2:
_vifRegs->r2 = data; vifRegs->r2 = data;
break; break;
case 3: case 3:
_vifRegs->r3 = data; vifRegs->r3 = data;
break; break;
jNO_DEFAULT; jNO_DEFAULT;
} }
@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg)
switch (reg) switch (reg)
{ {
case 0: case 0:
return _vifRegs->r0; return vifRegs->r0;
break; break;
case 1: case 1:
return _vifRegs->r1; return vifRegs->r1;
break; break;
case 2: case 2:
return _vifRegs->r2; return vifRegs->r2;
break; break;
case 3: case 3:
return _vifRegs->r3; return vifRegs->r3;
break; break;
jNO_DEFAULT; jNO_DEFAULT;
} }
@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data)
switch (reg) switch (reg)
{ {
case 0: case 0:
_vifRegs->c0 = data; vifRegs->c0 = data;
break; break;
case 1: case 1:
_vifRegs->c1 = data; vifRegs->c1 = data;
break; break;
case 2: case 2:
_vifRegs->c2 = data; vifRegs->c2 = data;
break; break;
case 3: case 3:
_vifRegs->c3 = data; vifRegs->c3 = data;
break; break;
jNO_DEFAULT; jNO_DEFAULT;
} }
@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg)
switch (reg) switch (reg)
{ {
case 0: case 0:
return _vifRegs->c0; return vifRegs->c0;
break; break;
case 1: case 1:
return _vifRegs->c1; return vifRegs->c1;
break; break;
case 2: case 2:
return _vifRegs->c2; return vifRegs->c2;
break; break;
case 3: case 3:
return _vifRegs->c3; return vifRegs->c3;
break; break;
jNO_DEFAULT; jNO_DEFAULT;
} }

View File

@ -37,10 +37,10 @@ using namespace std; // for min / max
extern "C" extern "C"
{ {
// Need cdecl on these for ASM references. // Need cdecl on these for ASM references.
extern VIFregisters *_vifRegs; extern VIFregisters *vifRegs;
extern u32* _vifMaskRegs; extern u32* vifMaskRegs;
extern u32* _vifRow; extern u32* vifRow;
extern u32* _vifCol; extern u32* vifCol;
} }
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]);
@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]);
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]);
PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]);
extern vifStruct *_vif; extern vifStruct *vif;
vifStruct vif0, vif1; vifStruct vif0, vif1;
@ -254,21 +254,9 @@ __forceinline static int _limit(int a, int max)
static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum) static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum)
{ {
const VIFUnpackFuncTable *unpack; const VIFUnpackFuncTable *unpack;
vifStruct *vif;
VIFregisters *vifRegs;
unpack = &VIFfuncTable[ unpackType ]; unpack = &VIFfuncTable[ unpackType ];
if (VIFdmanum == 0)
{
vif = &vif0;
vifRegs = vif0Regs;
}
else
{
vif = &vif1;
vifRegs = vif1Regs;
}
switch (unpackType) switch (unpackType)
{ {
case 0x0: case 0x0:
@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int
} }
//This is sorted out later //This is sorted out later
vif->tag.addr &= ~0xf; if((vif->tag.addr & 0xf) != (vifRegs->offset * 4))
{
VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr);
vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4);
}
} }
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
{ {
u32 *dest; u32 *dest;
u32 unpackType; u32 unpackType;
UNPACKFUNCTYPE func; UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft; const VIFUnpackFuncTable *ft;
vifStruct *vif;
VIFregisters *vifRegs;
VURegs * VU; VURegs * VU;
u8 *cdata = (u8*)data; u8 *cdata = (u8*)data;
#ifdef _DEBUG #ifdef _DEBUG
u32 memsize = VIFdmanum ? 0x4000 : 0x1000; u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
#endif #endif
_mm_prefetch((char*)data, _MM_HINT_NTA);
if (VIFdmanum == 0) if (VIFdmanum == 0)
{ {
VU = &VU0; VU = &VU0;
vif = &vif0;
vifRegs = vif0Regs;
assert(v->addr < memsize); assert(v->addr < memsize);
} }
else else
{ {
VU = &VU1; VU = &VU1;
vif = &vif1;
vifRegs = vif1Regs;
assert(v->addr < memsize); assert(v->addr < memsize);
if (vu1MicroIsSkipping())
{
// don't process since the frame is dummy
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
return;
}
} }
dest = (u32*)(VU->Mem + v->addr); dest = (u32*)(VU->Mem + v->addr);
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
#ifdef _DEBUG
if (v->size != size)
{
VIF_LOG("*PCSX2*: warning v->size != size");
}
if ((v->addr + size*4) > memsize)
{
Console::Notice("*PCSX2*: fixme unpack overflow");
Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x",
params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr);
}
#endif
// The unpack type // The unpack type
unpackType = v->cmd & 0xf; unpackType = v->cmd & 0xf;
if (size == 0)
{
VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask);
}
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
_vifRegs = (VIFregisters*)vifRegs;
_vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks;
_vif = vif;
_vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
ft = &VIFfuncTable[ unpackType ]; ft = &VIFfuncTable[ unpackType ];
func = _vif->usn ? ft->funcU : ft->funcS; func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2; size <<= 2;
@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
memsize = size; memsize = size;
#endif #endif
if (v->size != (size >> 2)) if(vif1Regs->offset != 0)
ProcessMemSkip(size, unpackType, VIFdmanum); {
int unpacksize;
if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000))
{
//Sanity Check (memory overflow)
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
}
if (_vifRegs->offset > 0)
{
int destinc, unpacksize;
//This is just to make sure the alignment isnt loopy on a split packet //This is just to make sure the alignment isnt loopy on a split packet
if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
{ {
DevCon::Error("Warning: Unpack alignment error"); DevCon::Error("Warning: Unpack alignment error");
} }
@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize)
VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset);
// SSE doesn't handle such small data
if (vifRegs->offset < (u32)ft->qsize) if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
{ {
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
{ }
Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
}
unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset)); unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset));
}
else
{
unpacksize = 0;
Console::WriteLn("Unpack align offset = 0");
}
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
destinc = (4 - ft->qsize) + unpacksize;
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
func(dest, (u32*)cdata, unpacksize); func(dest, (u32*)cdata, unpacksize);
size -= unpacksize * ft->dsize; size -= unpacksize * ft->dsize;
cdata += unpacksize * ft->dsize;
vifRegs->num--; vifRegs->num--;
++vif->cl; ++vif->cl;
if (vif->cl == vifRegs->cycle.wl) if (vif->cl == vifRegs->cycle.wl)
{ {
if (vifRegs->cycle.cl != vifRegs->cycle.wl) if (vifRegs->cycle.cl != vifRegs->cycle.wl)
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; {
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4;
//dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
}
else else
dest += destinc; {
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
//dest += destinc;
}
vif->cl = 0; vif->cl = 0;
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
return size >> 2;
} }
else else
{ {
dest += destinc; vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
dest += (4 - ft->qsize) + unpacksize;
cdata += unpacksize * ft->dsize;
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
} }
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
} }
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write if (vif->cl != 0) //Check alignment for SSE unpacks
{ {
#ifdef _DEBUG #ifdef _DEBUG
@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
int incdest; int incdest;
if (vif->cl != 0) if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
{ {
// continuation from last stream // continuation from last stream
@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if (vif->cl == vifRegs->cycle.wl) if (vif->cl == vifRegs->cycle.wl)
{ {
dest += incdest; dest += incdest;
vif->tag.addr += incdest * 4;
vif->cl = 0; vif->cl = 0;
break; break;
} }
dest += 4; dest += 4;
vif->tag.addr += 16;
} }
// have to update if(vifRegs->mode == 2)
_vifRow[0] = _vifRegs->r0; {
_vifRow[1] = _vifRegs->r1; //Update the reg rows for SSE
_vifRow[2] = _vifRegs->r2; vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
_vifRow[3] = _vifRegs->r3; vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
} }
}
return size>>2;
}
if ((size >= ft->gsize) && !(v->addr&0xf))
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
{
u32 *dest;
u32 unpackType;
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
VURegs * VU;
u8 *cdata = (u8*)data;
#ifdef _DEBUG
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
#endif
_mm_prefetch((char*)data, _MM_HINT_NTA);
if (VIFdmanum == 0)
{
VU = &VU0;
//vifRegs = vif0Regs;
assert(v->addr < memsize);
}
else
{
VU = &VU1;
//vifRegs = vif1Regs;
assert(v->addr < memsize);
if (vu1MicroIsSkipping())
{
// don't process since the frame is dummy
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
return;
}
}
dest = (u32*)(VU->Mem + v->addr);
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
// The unpack type
unpackType = v->cmd & 0xf;
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
ft = &VIFfuncTable[ unpackType ];
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
#ifdef _DEBUG
memsize = size;
#endif
#ifdef VIFUNPACKDEBUG
if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) *
((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000))
{
//Sanity Check (memory overflow)
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
}
#endif
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
{
#ifdef _DEBUG
static int s_count = 0;
#endif
if (size >= ft->gsize)
{ {
const UNPACKPARTFUNCTYPESSE* pfn; const UNPACKPARTFUNCTYPESSE* pfn;
int writemask; int writemask;
@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle; if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
if(vifRegs->mode == 2)
{
//Update the reg rows for non SSE
vifRegs->r0 = vifRow[0];
vifRegs->r1 = vifRow[1];
vifRegs->r2 = vifRow[2];
vifRegs->r3 = vifRow[3];
}
// if size is left over, update the src,dst pointers // if size is left over, update the src,dst pointers
if (writemask > 0) if (writemask > 0)
{ {
@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
cdata += left * ft->gsize; cdata += left * ft->gsize;
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16); dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
vifRegs->num -= left; vifRegs->num -= left;
_vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
size = writemask;
if (size >= ft->dsize && vifRegs->num > 0)
{
//VIF_LOG("warning, end with size = %d", size);
/* unpack one qword */
vif->tag.addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
}
} }
else else
{ {
vifRegs->num -= size / ft->gsize; vifRegs->num -= size / ft->gsize;
if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
size = 0;
} }
size = writemask; }
else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have
_vifRegs->r0 = _vifRow[0];
_vifRegs->r1 = _vifRow[1];
_vifRegs->r2 = _vifRow[2];
_vifRegs->r3 = _vifRow[3];
}
else
{
if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available
{
// v4-32
if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0))
{
vifRegs->num -= size >> 4;
memcpy_fast((u8*)dest, cdata, size);
size = 0;
return;
}
}
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
while ((size >= ft->gsize) && (vifRegs->num > 0))
{
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
size -= ft->gsize;
vifRegs->num--;
//if(vifRegs->num == loophere) dest = (u32*)(VU->Mem);
++vif->cl;
if (vif->cl == vifRegs->cycle.wl)
{
dest += incdest;
vif->cl = 0;
}
else
{
dest += 4;
}
}
// have to update
_vifRow[0] = _vifRegs->r0;
_vifRow[1] = _vifRegs->r1;
_vifRow[2] = _vifRegs->r2;
_vifRow[3] = _vifRegs->r3;
}
// used for debugging vif
// {
// int i, j, k;
// u32* curdest = olddest;
// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w");
// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr);
// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle);
// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]);
// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3);
//
// for(i = 0; i < memsize; ) {
// for(k = 0; k < vifRegs->cycle.wl; ++k) {
// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) {
// fprintf(ftemp, "%x ", curdest[4*k+j]);
// }
// }
//
// fprintf(ftemp, "\n");
// curdest += 4*vifRegs->cycle.cl;
// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl;
// }
// fclose(ftemp);
// }
// s_count++;
if (size >= ft->dsize && vifRegs->num > 0)
{ {
//VIF_LOG("warning, end with size = %d", size); //VIF_LOG("warning, end with size = %d", size);
@ -679,14 +665,20 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
vif->tag.addr += (size / ft->dsize) * 4; vif->tag.addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize); func(dest, (u32*)cdata, size / ft->dsize);
size = 0; size = 0;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
} }
} }
else /* filling write */ else /* filling write */
{ {
VIF_LOG("VIFunpack - filling write");
if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0)
DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); DevCon::Notice("Filling write warning! Size < packet size and CL != 0");
@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data)
vif0.tag.addr &= 0xfff; vif0.tag.addr &= 0xfff;
vif0.tag.size = len; vif0.tag.size = len;
vif0Regs->offset = 0; vif0Regs->offset = 0;
vifRegs = (VIFregisters*)vif0Regs;
vifMaskRegs = g_vif0Masks;
vif = &vif0;
vifRow = g_vifRow0;
} }
static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size)
{ {
/* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size); /* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size);
{ {
FILE *f = fopen("vu1.raw", "wb"); FILE *f = fopen("vu1.raw", "wb");
fwrite(data, 1, size*4, f); fwrite(data, 1, size*4, f);
@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
{ {
if (vif0.vifpacketsize < vif0.tag.size) if (vif0.vifpacketsize < vif0.tag.size)
{ {
_vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
vif0.tag.addr += vif0.vifpacketsize << 2; vif0.tag.addr += vif0.vifpacketsize << 2;
vif0.tag.size -= vif0.vifpacketsize; vif0.tag.size -= vif0.vifpacketsize;
return vif0.vifpacketsize; return vif0.vifpacketsize;
@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
{ {
int ret; int ret;
_vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
ret = vif0.tag.size; ret = vif0.tag.size;
vif0.tag.size = 0; vif0.tag.size = 0;
vif0.cmd = 0; vif0.cmd = 0;
@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
{ {
/* size is less that the total size, transfer is 'in pieces' */ /* size is less that the total size, transfer is 'in pieces' */
VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum);
ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum);
vif0.tag.size -= vif0.vifpacketsize; vif0.tag.size -= vif0.vifpacketsize;
FreezeXMMRegs(0); FreezeXMMRegs(0);
return vif0.vifpacketsize; return vif0.vifpacketsize;
@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
else else
{ {
/* we got all the data, transfer it fully */ /* we got all the data, transfer it fully */
int ret; int ret = vif0.tag.size;
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); //Align data after a split transfer first
ret = vif0.tag.size; if(vif0Regs->offset != 0 || vif0.cl != 0)
vif0.tag.size = 0; {
vif0.cmd = 0; vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
FreezeXMMRegs(0); data += ret - vif0.tag.size;
return ret; if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
else
{
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
} }
} }
@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data)
vif1.cl = 0; vif1.cl = 0;
vif1.tag.addr <<= 4; vif1.tag.addr <<= 4;
vif1.tag.cmd = vif1.cmd; vif1.tag.cmd = vif1.cmd;
vifRegs = (VIFregisters*)vif1Regs;
vifMaskRegs = g_vif1Masks;
vif = &vif1;
vifRow = g_vifRow1;
} }
static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size)
{ {
/* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size); /* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size);
{ {
FILE *f = fopen("vu1.raw", "wb"); FILE *f = fopen("vu1.raw", "wb");
fwrite(data, 1, size*4, f); fwrite(data, 1, size*4, f);
@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
{ {
if (vif1.vifpacketsize < vif1.tag.size) if (vif1.vifpacketsize < vif1.tag.size)
{ {
_vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
vif1.tag.addr += vif1.vifpacketsize << 2; vif1.tag.addr += vif1.vifpacketsize << 2;
vif1.tag.size -= vif1.vifpacketsize; vif1.tag.size -= vif1.vifpacketsize;
return vif1.vifpacketsize; return vif1.vifpacketsize;
@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
else else
{ {
int ret; int ret;
_vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
ret = vif1.tag.size; ret = vif1.tag.size;
vif1.tag.size = 0; vif1.tag.size = 0;
vif1.cmd = 0; vif1.cmd = 0;
@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data)
/* size is less that the total size, transfer is /* size is less that the total size, transfer is
'in pieces' */ 'in pieces' */
VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum);
ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum);
vif1.tag.size -= vif1.vifpacketsize; vif1.tag.size -= vif1.vifpacketsize;
FreezeXMMRegs(0); FreezeXMMRegs(0);
return vif1.vifpacketsize; return vif1.vifpacketsize;
} }
else else
{ {
int ret; int ret = vif1.tag.size;
/* we got all the data, transfer it fully */
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); if(vif1Regs->offset != 0 || vif1.cl != 0)
ret = vif1.tag.size; {
vif1.tag.size = 0; vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
vif1.cmd = 0; data += ret - vif1.tag.size;
FreezeXMMRegs(0); if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
return ret; vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
else
{
/* we got all the data, transfer it fully */
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
} }
} }

View File

@ -18,9 +18,9 @@
*/ */
.intel_syntax noprefix .intel_syntax noprefix
.extern _vifRegs .extern vifRegs
.extern _vifMaskRegs .extern vifMaskRegs
.extern _vifRow .extern vifRow
#define VIF_ESP esp #define VIF_ESP esp
#define VIF_SRC esi #define VIF_SRC esi
@ -108,7 +108,7 @@
// setting up masks // setting up masks
#define UNPACK_Setup_Mask_SSE(CL) \ #define UNPACK_Setup_Mask_SSE(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \ mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \
@ -118,7 +118,7 @@
#define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL) #define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL)
#define UNPACK_Start_Setup_Mask_SSE_1(CL) \ #define UNPACK_Start_Setup_Mask_SSE_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \ mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
pand XMM_ROWMASK, XMM_ROW; \ pand XMM_ROWMASK, XMM_ROW; \
@ -129,12 +129,12 @@
#define UNPACK_Setup_Mask_SSE_0_1(CL) #define UNPACK_Setup_Mask_SSE_0_1(CL)
#define UNPACK_Setup_Mask_SSE_1_1(CL) \ #define UNPACK_Setup_Mask_SSE_1_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \ mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
// ignore CL, since vif.cycle.wl == 1 // ignore CL, since vif.cycle.wl == 1
#define UNPACK_Setup_Mask_SSE_2_1(CL) \ #define UNPACK_Setup_Mask_SSE_2_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \ mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
@ -1312,9 +1312,9 @@
#pragma warning(disable:4731) #pragma warning(disable:4731)
#define SAVE_ROW_REG_BASE \ #define SAVE_ROW_REG_BASE \
mov VIF_TMPADDR, _vifRow; \ mov VIF_TMPADDR, vifRow; \
movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \ movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \
mov VIF_TMPADDR, _vifRegs; \ mov VIF_TMPADDR, vifRegs; \
movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \ movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \
psrldq XMM_ROW, 4; \ psrldq XMM_ROW, 4; \
movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \ movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \
@ -1349,7 +1349,7 @@
.globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \ .globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \
UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \ UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \
INIT_ARGS(); \ INIT_ARGS(); \
mov VIF_TMPADDR, _vifRegs; \ mov VIF_TMPADDR, vifRegs; \
movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \ movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \ movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \
sub VIF_INC, VIF_SAVEEBX; \ sub VIF_INC, VIF_SAVEEBX; \

View File

@ -5,9 +5,9 @@
.xmm .xmm
extern _vifRegs:ptr extern vifRegs:ptr
extern _vifMaskRegs:ptr extern vifMaskRegs:ptr
extern _vifRow:ptr extern vifRow:ptr
extern s_TempDecompress:ptr extern s_TempDecompress:ptr
@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0
UNPACK_Setup_Mask_SSE macro CL UNPACK_Setup_Mask_SSE macro CL
mov eax, [_vifMaskRegs] mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm4, [eax + 64*(CL) + 16]
movdqa xmm5, [eax + 64*(CL) + 32] movdqa xmm5, [eax + 64*(CL) + 32]
movdqa xmm3, [eax + 64*(CL)] movdqa xmm3, [eax + 64*(CL)]
@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL
endm endm
UNPACK_Start_Setup_Mask_SSE_1 macro CL UNPACK_Start_Setup_Mask_SSE_1 macro CL
mov eax, [_vifMaskRegs] mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm4, [eax + 64*(CL) + 16]
movdqa xmm5, [eax + 64*(CL) + 32] movdqa xmm5, [eax + 64*(CL) + 32]
pand xmm4, xmm6 pand xmm4, xmm6
@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL
UNPACK_Setup_Mask_SSE_0_1 macro CL UNPACK_Setup_Mask_SSE_0_1 macro CL
endm endm
UNPACK_Setup_Mask_SSE_1_1 macro CL UNPACK_Setup_Mask_SSE_1_1 macro CL
mov eax, [_vifMaskRegs] mov eax, [vifMaskRegs]
movdqa xmm3, [eax + 64*(0)] movdqa xmm3, [eax + 64*(0)]
endm endm
UNPACK_Setup_Mask_SSE_2_1 macro CL UNPACK_Setup_Mask_SSE_2_1 macro CL
mov eax, [_vifMaskRegs] mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(0) + 16] movdqa xmm4, [eax + 64*(0) + 16]
movdqa xmm5, [eax + 64*(0) + 32] movdqa xmm5, [eax + 64*(0) + 32]
movdqa xmm3, [eax + 64*(0)] movdqa xmm3, [eax + 64*(0)]
@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType
SAVE_ROW_REG_BASE macro SAVE_ROW_REG_BASE macro
mov eax, [_vifRow] mov eax, [vifRow]
movdqa [eax], xmm6 movdqa [eax], xmm6
mov eax, [_vifRegs] mov eax, [vifRegs]
movss dword ptr [eax+0100h], xmm6 movss dword ptr [eax+0100h], xmm6
psrldq xmm6, 4 psrldq xmm6, 4
movss dword ptr [eax+0110h], xmm6 movss dword ptr [eax+0110h], xmm6
@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE
push ebx push ebx
INIT_ARGS INIT_ARGS
mov eax, [_vifRegs] mov eax, [vifRegs]
movzx ecx, byte ptr [eax + 040h] movzx ecx, byte ptr [eax + 040h]
movzx ebx, byte ptr [eax + 041h] movzx ebx, byte ptr [eax + 041h]
sub ecx, ebx sub ecx, ebx