mirror of https://github.com/PCSX2/pcsx2.git
Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
a388d2969c
commit
4df3f80d30
|
@ -25,10 +25,10 @@
|
||||||
#include "Vif.h"
|
#include "Vif.h"
|
||||||
#include "VifDma.h"
|
#include "VifDma.h"
|
||||||
|
|
||||||
VIFregisters *_vifRegs;
|
VIFregisters *vifRegs;
|
||||||
u32* _vifRow = NULL, *_vifCol = NULL;
|
u32* vifRow = NULL, *vifCol = NULL;
|
||||||
u32* _vifMaskRegs = NULL;
|
u32* vifMaskRegs = NULL;
|
||||||
vifStruct *_vif;
|
vifStruct *vif;
|
||||||
|
|
||||||
PCSX2_ALIGNED16(u32 g_vifRow0[4]);
|
PCSX2_ALIGNED16(u32 g_vifRow0[4]);
|
||||||
PCSX2_ALIGNED16(u32 g_vifCol0[4]);
|
PCSX2_ALIGNED16(u32 g_vifCol0[4]);
|
||||||
|
@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
||||||
int n;
|
int n;
|
||||||
u32 vifRowReg = getVifRowRegs(offnum);
|
u32 vifRowReg = getVifRowRegs(offnum);
|
||||||
|
|
||||||
if (_vifRegs->code & 0x10000000)
|
if (vifRegs->code & 0x10000000)
|
||||||
{
|
{
|
||||||
switch (_vif->cl)
|
switch (vif->cl)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
if (offnum == OFFSET_X)
|
if (offnum == OFFSET_X)
|
||||||
n = (_vifRegs->mask) & 0x3;
|
n = (vifRegs->mask) & 0x3;
|
||||||
else
|
else
|
||||||
n = (_vifRegs->mask >> (offnum * 2)) & 0x3;
|
n = (vifRegs->mask >> (offnum * 2)) & 0x3;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
|
n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
|
n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
|
n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
||||||
switch (n)
|
switch (n)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
if ((_vif->cmd & 0x6F) == 0x6f)
|
if ((vif->cmd & 0x6F) == 0x6f)
|
||||||
{
|
{
|
||||||
dest = data;
|
dest = data;
|
||||||
}
|
}
|
||||||
else switch (_vifRegs->mode)
|
else switch (vifRegs->mode)
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
dest = data + vifRowReg;
|
dest = data + vifRowReg;
|
||||||
|
@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
||||||
dest = vifRowReg;
|
dest = vifRowReg;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl);
|
dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
//Masked so don't do anything
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data);
|
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size)
|
||||||
template <class T>
|
template <class T>
|
||||||
void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
|
void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
|
||||||
{
|
{
|
||||||
if (_vifRegs->offset == OFFSET_X)
|
if (vifRegs->offset == OFFSET_X)
|
||||||
{
|
{
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||||
_vifRegs->offset = OFFSET_Y;
|
vifRegs->offset = OFFSET_Y;
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_vifRegs->offset == OFFSET_Y)
|
if (vifRegs->offset == OFFSET_Y)
|
||||||
{
|
{
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *data);
|
writeXYZW(vifRegs->offset, *dest++, *data);
|
||||||
_vifRegs->offset = OFFSET_Z;
|
vifRegs->offset = OFFSET_Z;
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_vifRegs->offset == OFFSET_Z)
|
if (vifRegs->offset == OFFSET_Z)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *dest-2);
|
writeXYZW(vifRegs->offset, *dest++, *dest-2);
|
||||||
_vifRegs->offset = OFFSET_W;
|
vifRegs->offset = OFFSET_W;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_vifRegs->offset == OFFSET_W)
|
if (vifRegs->offset == OFFSET_W)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest, *data);
|
writeXYZW(vifRegs->offset, *dest, *data);
|
||||||
_vifRegs->offset = OFFSET_X;
|
vifRegs->offset = OFFSET_X;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
|
void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
|
||||||
{
|
{
|
||||||
if(_vifRegs->offset == OFFSET_X)
|
if(vifRegs->offset == OFFSET_X)
|
||||||
{
|
{
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||||
_vifRegs->offset = OFFSET_Y;
|
vifRegs->offset = OFFSET_Y;
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_vifRegs->offset == OFFSET_Y)
|
if(vifRegs->offset == OFFSET_Y)
|
||||||
{
|
{
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||||
_vifRegs->offset = OFFSET_Z;
|
vifRegs->offset = OFFSET_Z;
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_vifRegs->offset == OFFSET_Z)
|
if(vifRegs->offset == OFFSET_Z)
|
||||||
{
|
{
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||||
_vifRegs->offset = OFFSET_W;
|
vifRegs->offset = OFFSET_W;
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_vifRegs->offset == OFFSET_W)
|
if(vifRegs->offset == OFFSET_W)
|
||||||
{
|
{
|
||||||
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
|
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
|
||||||
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
|
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
|
||||||
writeXYZW(_vifRegs->offset, *dest, *data);
|
writeXYZW(vifRegs->offset, *dest, *data);
|
||||||
_vifRegs->offset = OFFSET_X;
|
vifRegs->offset = OFFSET_X;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
|
||||||
{
|
{
|
||||||
while (size > 0)
|
while (size > 0)
|
||||||
{
|
{
|
||||||
writeXYZW(_vifRegs->offset, *dest++, *data++);
|
writeXYZW(vifRegs->offset, *dest++, *data++);
|
||||||
_vifRegs->offset++;
|
vifRegs->offset++;
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X;
|
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
|
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
|
||||||
|
@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer()
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline int mfifoVIF1chain()
|
static __forceinline int mfifo_VIF1chain()
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -531,7 +530,7 @@ void vifMFIFOInterrupt()
|
||||||
{
|
{
|
||||||
g_vifCycles = 0;
|
g_vifCycles = 0;
|
||||||
|
|
||||||
if (vif1.inprogress == 1) mfifoVIF1chain();
|
if (vif1.inprogress == 1) mfifo_VIF1chain();
|
||||||
|
|
||||||
if (vif1.irq && vif1.tag.size == 0)
|
if (vif1.irq && vif1.tag.size == 0)
|
||||||
{
|
{
|
||||||
|
|
38
pcsx2/Vif.h
38
pcsx2/Vif.h
|
@ -81,9 +81,9 @@ struct VIFregisters {
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
// these use cdecl for Asm code references.
|
// these use cdecl for Asm code references.
|
||||||
extern VIFregisters *_vifRegs;
|
extern VIFregisters *vifRegs;
|
||||||
extern u32* _vifMaskRegs;
|
extern u32* vifMaskRegs;
|
||||||
extern u32* _vifRow;
|
extern u32* vifRow;
|
||||||
extern u32* _vifCol;
|
extern u32* _vifCol;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
|
||||||
switch (reg)
|
switch (reg)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
_vifRegs->r0 = data;
|
vifRegs->r0 = data;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
_vifRegs->r1 = data;
|
vifRegs->r1 = data;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
_vifRegs->r2 = data;
|
vifRegs->r2 = data;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
_vifRegs->r3 = data;
|
vifRegs->r3 = data;
|
||||||
break;
|
break;
|
||||||
jNO_DEFAULT;
|
jNO_DEFAULT;
|
||||||
}
|
}
|
||||||
|
@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg)
|
||||||
switch (reg)
|
switch (reg)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
return _vifRegs->r0;
|
return vifRegs->r0;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
return _vifRegs->r1;
|
return vifRegs->r1;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
return _vifRegs->r2;
|
return vifRegs->r2;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
return _vifRegs->r3;
|
return vifRegs->r3;
|
||||||
break;
|
break;
|
||||||
jNO_DEFAULT;
|
jNO_DEFAULT;
|
||||||
}
|
}
|
||||||
|
@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data)
|
||||||
switch (reg)
|
switch (reg)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
_vifRegs->c0 = data;
|
vifRegs->c0 = data;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
_vifRegs->c1 = data;
|
vifRegs->c1 = data;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
_vifRegs->c2 = data;
|
vifRegs->c2 = data;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
_vifRegs->c3 = data;
|
vifRegs->c3 = data;
|
||||||
break;
|
break;
|
||||||
jNO_DEFAULT;
|
jNO_DEFAULT;
|
||||||
}
|
}
|
||||||
|
@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg)
|
||||||
switch (reg)
|
switch (reg)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
return _vifRegs->c0;
|
return vifRegs->c0;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
return _vifRegs->c1;
|
return vifRegs->c1;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
return _vifRegs->c2;
|
return vifRegs->c2;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
return _vifRegs->c3;
|
return vifRegs->c3;
|
||||||
break;
|
break;
|
||||||
jNO_DEFAULT;
|
jNO_DEFAULT;
|
||||||
}
|
}
|
||||||
|
|
453
pcsx2/VifDma.cpp
453
pcsx2/VifDma.cpp
|
@ -37,10 +37,10 @@ using namespace std; // for min / max
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
// Need cdecl on these for ASM references.
|
// Need cdecl on these for ASM references.
|
||||||
extern VIFregisters *_vifRegs;
|
extern VIFregisters *vifRegs;
|
||||||
extern u32* _vifMaskRegs;
|
extern u32* vifMaskRegs;
|
||||||
extern u32* _vifRow;
|
extern u32* vifRow;
|
||||||
extern u32* _vifCol;
|
extern u32* vifCol;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]);
|
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]);
|
||||||
|
@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]);
|
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]);
|
PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]);
|
||||||
|
|
||||||
extern vifStruct *_vif;
|
extern vifStruct *vif;
|
||||||
|
|
||||||
vifStruct vif0, vif1;
|
vifStruct vif0, vif1;
|
||||||
|
|
||||||
|
@ -254,20 +254,8 @@ __forceinline static int _limit(int a, int max)
|
||||||
static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum)
|
static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum)
|
||||||
{
|
{
|
||||||
const VIFUnpackFuncTable *unpack;
|
const VIFUnpackFuncTable *unpack;
|
||||||
vifStruct *vif;
|
|
||||||
VIFregisters *vifRegs;
|
|
||||||
unpack = &VIFfuncTable[ unpackType ];
|
|
||||||
|
|
||||||
if (VIFdmanum == 0)
|
unpack = &VIFfuncTable[ unpackType ];
|
||||||
{
|
|
||||||
vif = &vif0;
|
|
||||||
vifRegs = vif0Regs;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
vif = &vif1;
|
|
||||||
vifRegs = vif1Regs;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (unpackType)
|
switch (unpackType)
|
||||||
{
|
{
|
||||||
|
@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int
|
||||||
}
|
}
|
||||||
|
|
||||||
//This is sorted out later
|
//This is sorted out later
|
||||||
vif->tag.addr &= ~0xf;
|
if((vif->tag.addr & 0xf) != (vifRegs->offset * 4))
|
||||||
|
{
|
||||||
|
VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr);
|
||||||
|
vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
|
static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
|
||||||
{
|
{
|
||||||
u32 *dest;
|
u32 *dest;
|
||||||
u32 unpackType;
|
u32 unpackType;
|
||||||
UNPACKFUNCTYPE func;
|
UNPACKFUNCTYPE func;
|
||||||
const VIFUnpackFuncTable *ft;
|
const VIFUnpackFuncTable *ft;
|
||||||
vifStruct *vif;
|
|
||||||
VIFregisters *vifRegs;
|
|
||||||
VURegs * VU;
|
VURegs * VU;
|
||||||
u8 *cdata = (u8*)data;
|
u8 *cdata = (u8*)data;
|
||||||
|
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
|
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_mm_prefetch((char*)data, _MM_HINT_NTA);
|
|
||||||
|
|
||||||
if (VIFdmanum == 0)
|
if (VIFdmanum == 0)
|
||||||
{
|
{
|
||||||
VU = &VU0;
|
VU = &VU0;
|
||||||
vif = &vif0;
|
|
||||||
vifRegs = vif0Regs;
|
|
||||||
assert(v->addr < memsize);
|
assert(v->addr < memsize);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
VU = &VU1;
|
VU = &VU1;
|
||||||
vif = &vif1;
|
|
||||||
vifRegs = vif1Regs;
|
|
||||||
assert(v->addr < memsize);
|
assert(v->addr < memsize);
|
||||||
|
|
||||||
if (vu1MicroIsSkipping())
|
|
||||||
{
|
|
||||||
// don't process since the frame is dummy
|
|
||||||
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
|
|
||||||
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
||||||
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
||||||
|
|
||||||
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
|
||||||
#ifdef _DEBUG
|
|
||||||
if (v->size != size)
|
|
||||||
{
|
|
||||||
VIF_LOG("*PCSX2*: warning v->size != size");
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((v->addr + size*4) > memsize)
|
|
||||||
{
|
|
||||||
Console::Notice("*PCSX2*: fixme unpack overflow");
|
|
||||||
Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x",
|
|
||||||
params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// The unpack type
|
// The unpack type
|
||||||
unpackType = v->cmd & 0xf;
|
unpackType = v->cmd & 0xf;
|
||||||
|
|
||||||
if (size == 0)
|
|
||||||
{
|
|
||||||
VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
|
||||||
|
|
||||||
_vifRegs = (VIFregisters*)vifRegs;
|
|
||||||
_vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks;
|
|
||||||
_vif = vif;
|
|
||||||
_vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
|
|
||||||
ft = &VIFfuncTable[ unpackType ];
|
ft = &VIFfuncTable[ unpackType ];
|
||||||
func = _vif->usn ? ft->funcU : ft->funcS;
|
func = vif->usn ? ft->funcU : ft->funcS;
|
||||||
|
|
||||||
size <<= 2;
|
size <<= 2;
|
||||||
|
|
||||||
|
@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
memsize = size;
|
memsize = size;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (v->size != (size >> 2))
|
if(vif1Regs->offset != 0)
|
||||||
ProcessMemSkip(size, unpackType, VIFdmanum);
|
|
||||||
|
|
||||||
|
|
||||||
if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000))
|
|
||||||
{
|
{
|
||||||
//Sanity Check (memory overflow)
|
int unpacksize;
|
||||||
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_vifRegs->offset > 0)
|
|
||||||
{
|
|
||||||
int destinc, unpacksize;
|
|
||||||
|
|
||||||
//This is just to make sure the alignment isnt loopy on a split packet
|
//This is just to make sure the alignment isnt loopy on a split packet
|
||||||
if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
|
if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
|
||||||
{
|
{
|
||||||
DevCon::Error("Warning: Unpack alignment error");
|
DevCon::Error("Warning: Unpack alignment error");
|
||||||
}
|
}
|
||||||
|
@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
|
|
||||||
if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize)
|
if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize)
|
||||||
VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset);
|
VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset);
|
||||||
// SSE doesn't handle such small data
|
|
||||||
|
|
||||||
if (vifRegs->offset < (u32)ft->qsize)
|
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
|
||||||
{
|
{
|
||||||
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
|
DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
|
||||||
{
|
}
|
||||||
Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
|
|
||||||
}
|
|
||||||
unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset));
|
unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset));
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
unpacksize = 0;
|
|
||||||
Console::WriteLn("Unpack align offset = 0");
|
|
||||||
}
|
|
||||||
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
|
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
|
||||||
destinc = (4 - ft->qsize) + unpacksize;
|
|
||||||
|
|
||||||
func(dest, (u32*)cdata, unpacksize);
|
func(dest, (u32*)cdata, unpacksize);
|
||||||
size -= unpacksize * ft->dsize;
|
size -= unpacksize * ft->dsize;
|
||||||
cdata += unpacksize * ft->dsize;
|
|
||||||
|
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
++vif->cl;
|
++vif->cl;
|
||||||
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
if (vif->cl == vifRegs->cycle.wl)
|
||||||
{
|
{
|
||||||
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
|
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
|
||||||
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
|
{
|
||||||
|
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4;
|
||||||
|
//dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
dest += destinc;
|
{
|
||||||
|
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
|
||||||
|
//dest += destinc;
|
||||||
|
}
|
||||||
vif->cl = 0;
|
vif->cl = 0;
|
||||||
|
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||||
|
return size >> 2;
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dest += destinc;
|
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
|
||||||
|
dest += (4 - ft->qsize) + unpacksize;
|
||||||
|
cdata += unpacksize * ft->dsize;
|
||||||
|
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
|
||||||
|
if (vif->cl != 0) //Check alignment for SSE unpacks
|
||||||
{
|
{
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
|
@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
|
|
||||||
int incdest;
|
int incdest;
|
||||||
|
|
||||||
if (vif->cl != 0)
|
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
||||||
{
|
{
|
||||||
// continuation from last stream
|
// continuation from last stream
|
||||||
|
|
||||||
|
@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
if (vif->cl == vifRegs->cycle.wl)
|
||||||
{
|
{
|
||||||
dest += incdest;
|
dest += incdest;
|
||||||
|
vif->tag.addr += incdest * 4;
|
||||||
vif->cl = 0;
|
vif->cl = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
dest += 4;
|
dest += 4;
|
||||||
|
vif->tag.addr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
// have to update
|
if(vifRegs->mode == 2)
|
||||||
_vifRow[0] = _vifRegs->r0;
|
{
|
||||||
_vifRow[1] = _vifRegs->r1;
|
//Update the reg rows for SSE
|
||||||
_vifRow[2] = _vifRegs->r2;
|
vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
|
||||||
_vifRow[3] = _vifRegs->r3;
|
vifRow[0] = vifRegs->r0;
|
||||||
|
vifRow[1] = vifRegs->r1;
|
||||||
|
vifRow[2] = vifRegs->r2;
|
||||||
|
vifRow[3] = vifRegs->r3;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return size>>2;
|
||||||
|
}
|
||||||
|
|
||||||
if ((size >= ft->gsize) && !(v->addr&0xf))
|
|
||||||
|
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
|
||||||
|
{
|
||||||
|
u32 *dest;
|
||||||
|
u32 unpackType;
|
||||||
|
UNPACKFUNCTYPE func;
|
||||||
|
const VIFUnpackFuncTable *ft;
|
||||||
|
VURegs * VU;
|
||||||
|
u8 *cdata = (u8*)data;
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
_mm_prefetch((char*)data, _MM_HINT_NTA);
|
||||||
|
|
||||||
|
if (VIFdmanum == 0)
|
||||||
|
{
|
||||||
|
VU = &VU0;
|
||||||
|
//vifRegs = vif0Regs;
|
||||||
|
assert(v->addr < memsize);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
|
VU = &VU1;
|
||||||
|
//vifRegs = vif1Regs;
|
||||||
|
assert(v->addr < memsize);
|
||||||
|
|
||||||
|
if (vu1MicroIsSkipping())
|
||||||
|
{
|
||||||
|
// don't process since the frame is dummy
|
||||||
|
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
|
|
||||||
|
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
||||||
|
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
||||||
|
|
||||||
|
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
||||||
|
|
||||||
|
// The unpack type
|
||||||
|
unpackType = v->cmd & 0xf;
|
||||||
|
|
||||||
|
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
||||||
|
|
||||||
|
ft = &VIFfuncTable[ unpackType ];
|
||||||
|
func = vif->usn ? ft->funcU : ft->funcS;
|
||||||
|
|
||||||
|
size <<= 2;
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
memsize = size;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef VIFUNPACKDEBUG
|
||||||
|
|
||||||
|
if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) *
|
||||||
|
((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000))
|
||||||
|
{
|
||||||
|
//Sanity Check (memory overflow)
|
||||||
|
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
||||||
|
{
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
static int s_count = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
if (size >= ft->gsize)
|
||||||
{
|
{
|
||||||
const UNPACKPARTFUNCTYPESSE* pfn;
|
const UNPACKPARTFUNCTYPESSE* pfn;
|
||||||
int writemask;
|
int writemask;
|
||||||
|
@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
|
|
||||||
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
|
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
|
||||||
|
|
||||||
|
if(vifRegs->mode == 2)
|
||||||
|
{
|
||||||
|
//Update the reg rows for non SSE
|
||||||
|
vifRegs->r0 = vifRow[0];
|
||||||
|
vifRegs->r1 = vifRow[1];
|
||||||
|
vifRegs->r2 = vifRow[2];
|
||||||
|
vifRegs->r3 = vifRow[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// if size is left over, update the src,dst pointers
|
// if size is left over, update the src,dst pointers
|
||||||
if (writemask > 0)
|
if (writemask > 0)
|
||||||
{
|
{
|
||||||
|
@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
cdata += left * ft->gsize;
|
cdata += left * ft->gsize;
|
||||||
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
|
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
|
||||||
vifRegs->num -= left;
|
vifRegs->num -= left;
|
||||||
_vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
||||||
|
size = writemask;
|
||||||
|
|
||||||
|
if (size >= ft->dsize && vifRegs->num > 0)
|
||||||
|
{
|
||||||
|
//VIF_LOG("warning, end with size = %d", size);
|
||||||
|
|
||||||
|
/* unpack one qword */
|
||||||
|
vif->tag.addr += (size / ft->dsize) * 4;
|
||||||
|
func(dest, (u32*)cdata, size / ft->dsize);
|
||||||
|
size = 0;
|
||||||
|
|
||||||
|
if(vifRegs->mode == 2)
|
||||||
|
{
|
||||||
|
//Update the reg rows for SSE
|
||||||
|
vifRow[0] = vifRegs->r0;
|
||||||
|
vifRow[1] = vifRegs->r1;
|
||||||
|
vifRow[2] = vifRegs->r2;
|
||||||
|
vifRow[3] = vifRegs->r3;
|
||||||
|
}
|
||||||
|
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vifRegs->num -= size / ft->gsize;
|
vifRegs->num -= size / ft->gsize;
|
||||||
if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
||||||
|
size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = writemask;
|
|
||||||
|
|
||||||
_vifRegs->r0 = _vifRow[0];
|
|
||||||
_vifRegs->r1 = _vifRow[1];
|
|
||||||
_vifRegs->r2 = _vifRow[2];
|
|
||||||
_vifRegs->r3 = _vifRow[3];
|
|
||||||
}
|
}
|
||||||
else
|
else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have
|
||||||
{
|
|
||||||
|
|
||||||
if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available
|
|
||||||
{
|
|
||||||
// v4-32
|
|
||||||
if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0))
|
|
||||||
{
|
|
||||||
vifRegs->num -= size >> 4;
|
|
||||||
memcpy_fast((u8*)dest, cdata, size);
|
|
||||||
size = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
|
||||||
|
|
||||||
while ((size >= ft->gsize) && (vifRegs->num > 0))
|
|
||||||
{
|
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
|
||||||
cdata += ft->gsize;
|
|
||||||
size -= ft->gsize;
|
|
||||||
|
|
||||||
vifRegs->num--;
|
|
||||||
//if(vifRegs->num == loophere) dest = (u32*)(VU->Mem);
|
|
||||||
++vif->cl;
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
dest += incdest;
|
|
||||||
vif->cl = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
dest += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// have to update
|
|
||||||
_vifRow[0] = _vifRegs->r0;
|
|
||||||
_vifRow[1] = _vifRegs->r1;
|
|
||||||
_vifRow[2] = _vifRegs->r2;
|
|
||||||
_vifRow[3] = _vifRegs->r3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// used for debugging vif
|
|
||||||
// {
|
|
||||||
// int i, j, k;
|
|
||||||
// u32* curdest = olddest;
|
|
||||||
// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w");
|
|
||||||
// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr);
|
|
||||||
// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle);
|
|
||||||
// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]);
|
|
||||||
// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3);
|
|
||||||
//
|
|
||||||
// for(i = 0; i < memsize; ) {
|
|
||||||
// for(k = 0; k < vifRegs->cycle.wl; ++k) {
|
|
||||||
// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) {
|
|
||||||
// fprintf(ftemp, "%x ", curdest[4*k+j]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// fprintf(ftemp, "\n");
|
|
||||||
// curdest += 4*vifRegs->cycle.cl;
|
|
||||||
// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl;
|
|
||||||
// }
|
|
||||||
// fclose(ftemp);
|
|
||||||
// }
|
|
||||||
// s_count++;
|
|
||||||
|
|
||||||
if (size >= ft->dsize && vifRegs->num > 0)
|
|
||||||
{
|
{
|
||||||
//VIF_LOG("warning, end with size = %d", size);
|
//VIF_LOG("warning, end with size = %d", size);
|
||||||
|
|
||||||
|
@ -680,13 +666,19 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
func(dest, (u32*)cdata, size / ft->dsize);
|
func(dest, (u32*)cdata, size / ft->dsize);
|
||||||
size = 0;
|
size = 0;
|
||||||
|
|
||||||
|
if(vifRegs->mode == 2)
|
||||||
|
{
|
||||||
|
//Update the reg rows for SSE
|
||||||
|
vifRow[0] = vifRegs->r0;
|
||||||
|
vifRow[1] = vifRegs->r1;
|
||||||
|
vifRow[2] = vifRegs->r2;
|
||||||
|
vifRow[3] = vifRegs->r3;
|
||||||
|
}
|
||||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else /* filling write */
|
else /* filling write */
|
||||||
{
|
{
|
||||||
VIF_LOG("VIFunpack - filling write");
|
|
||||||
|
|
||||||
if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0)
|
if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0)
|
||||||
DevCon::Notice("Filling write warning! Size < packet size and CL != 0");
|
DevCon::Notice("Filling write warning! Size < packet size and CL != 0");
|
||||||
|
@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data)
|
||||||
vif0.tag.addr &= 0xfff;
|
vif0.tag.addr &= 0xfff;
|
||||||
vif0.tag.size = len;
|
vif0.tag.size = len;
|
||||||
vif0Regs->offset = 0;
|
vif0Regs->offset = 0;
|
||||||
|
|
||||||
|
vifRegs = (VIFregisters*)vif0Regs;
|
||||||
|
vifMaskRegs = g_vif0Masks;
|
||||||
|
vif = &vif0;
|
||||||
|
vifRow = g_vifRow0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size)
|
static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size)
|
||||||
{
|
{
|
||||||
/* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size);
|
/* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size);
|
||||||
{
|
{
|
||||||
FILE *f = fopen("vu1.raw", "wb");
|
FILE *f = fopen("vu1.raw", "wb");
|
||||||
fwrite(data, 1, size*4, f);
|
fwrite(data, 1, size*4, f);
|
||||||
|
@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
|
||||||
{
|
{
|
||||||
if (vif0.vifpacketsize < vif0.tag.size)
|
if (vif0.vifpacketsize < vif0.tag.size)
|
||||||
{
|
{
|
||||||
_vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
|
vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
|
||||||
vif0.tag.addr += vif0.vifpacketsize << 2;
|
vif0.tag.addr += vif0.vifpacketsize << 2;
|
||||||
vif0.tag.size -= vif0.vifpacketsize;
|
vif0.tag.size -= vif0.vifpacketsize;
|
||||||
return vif0.vifpacketsize;
|
return vif0.vifpacketsize;
|
||||||
|
@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
_vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
|
vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
|
||||||
ret = vif0.tag.size;
|
ret = vif0.tag.size;
|
||||||
vif0.tag.size = 0;
|
vif0.tag.size = 0;
|
||||||
vif0.cmd = 0;
|
vif0.cmd = 0;
|
||||||
|
@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
|
||||||
{
|
{
|
||||||
/* size is less that the total size, transfer is 'in pieces' */
|
/* size is less that the total size, transfer is 'in pieces' */
|
||||||
VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum);
|
VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum);
|
||||||
|
|
||||||
|
ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum);
|
||||||
|
|
||||||
vif0.tag.size -= vif0.vifpacketsize;
|
vif0.tag.size -= vif0.vifpacketsize;
|
||||||
FreezeXMMRegs(0);
|
FreezeXMMRegs(0);
|
||||||
return vif0.vifpacketsize;
|
return vif0.vifpacketsize;
|
||||||
|
@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* we got all the data, transfer it fully */
|
/* we got all the data, transfer it fully */
|
||||||
int ret;
|
int ret = vif0.tag.size;
|
||||||
|
|
||||||
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
//Align data after a split transfer first
|
||||||
ret = vif0.tag.size;
|
if(vif0Regs->offset != 0 || vif0.cl != 0)
|
||||||
vif0.tag.size = 0;
|
{
|
||||||
vif0.cmd = 0;
|
vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||||
FreezeXMMRegs(0);
|
data += ret - vif0.tag.size;
|
||||||
return ret;
|
if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||||
|
vif0.tag.size = 0;
|
||||||
|
vif0.cmd = 0;
|
||||||
|
FreezeXMMRegs(0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
|
||||||
|
vif0.tag.size = 0;
|
||||||
|
vif0.cmd = 0;
|
||||||
|
FreezeXMMRegs(0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data)
|
||||||
vif1.cl = 0;
|
vif1.cl = 0;
|
||||||
vif1.tag.addr <<= 4;
|
vif1.tag.addr <<= 4;
|
||||||
vif1.tag.cmd = vif1.cmd;
|
vif1.tag.cmd = vif1.cmd;
|
||||||
|
|
||||||
|
vifRegs = (VIFregisters*)vif1Regs;
|
||||||
|
vifMaskRegs = g_vif1Masks;
|
||||||
|
vif = &vif1;
|
||||||
|
vifRow = g_vifRow1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size)
|
static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size)
|
||||||
{
|
{
|
||||||
/* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size);
|
/* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size);
|
||||||
{
|
{
|
||||||
FILE *f = fopen("vu1.raw", "wb");
|
FILE *f = fopen("vu1.raw", "wb");
|
||||||
fwrite(data, 1, size*4, f);
|
fwrite(data, 1, size*4, f);
|
||||||
|
@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
|
||||||
{
|
{
|
||||||
if (vif1.vifpacketsize < vif1.tag.size)
|
if (vif1.vifpacketsize < vif1.tag.size)
|
||||||
{
|
{
|
||||||
_vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
|
vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
|
||||||
vif1.tag.addr += vif1.vifpacketsize << 2;
|
vif1.tag.addr += vif1.vifpacketsize << 2;
|
||||||
vif1.tag.size -= vif1.vifpacketsize;
|
vif1.tag.size -= vif1.vifpacketsize;
|
||||||
return vif1.vifpacketsize;
|
return vif1.vifpacketsize;
|
||||||
|
@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
_vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
|
vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
|
||||||
ret = vif1.tag.size;
|
ret = vif1.tag.size;
|
||||||
vif1.tag.size = 0;
|
vif1.tag.size = 0;
|
||||||
vif1.cmd = 0;
|
vif1.cmd = 0;
|
||||||
|
@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data)
|
||||||
/* size is less that the total size, transfer is
|
/* size is less that the total size, transfer is
|
||||||
'in pieces' */
|
'in pieces' */
|
||||||
VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum);
|
VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum);
|
||||||
|
|
||||||
|
ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum);
|
||||||
vif1.tag.size -= vif1.vifpacketsize;
|
vif1.tag.size -= vif1.vifpacketsize;
|
||||||
FreezeXMMRegs(0);
|
FreezeXMMRegs(0);
|
||||||
return vif1.vifpacketsize;
|
return vif1.vifpacketsize;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = vif1.tag.size;
|
||||||
/* we got all the data, transfer it fully */
|
|
||||||
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
if(vif1Regs->offset != 0 || vif1.cl != 0)
|
||||||
ret = vif1.tag.size;
|
{
|
||||||
vif1.tag.size = 0;
|
vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||||
vif1.cmd = 0;
|
data += ret - vif1.tag.size;
|
||||||
FreezeXMMRegs(0);
|
if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||||
return ret;
|
vif1.tag.size = 0;
|
||||||
|
vif1.cmd = 0;
|
||||||
|
FreezeXMMRegs(0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* we got all the data, transfer it fully */
|
||||||
|
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
|
||||||
|
vif1.tag.size = 0;
|
||||||
|
vif1.cmd = 0;
|
||||||
|
FreezeXMMRegs(0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,9 +18,9 @@
|
||||||
*/
|
*/
|
||||||
.intel_syntax noprefix
|
.intel_syntax noprefix
|
||||||
|
|
||||||
.extern _vifRegs
|
.extern vifRegs
|
||||||
.extern _vifMaskRegs
|
.extern vifMaskRegs
|
||||||
.extern _vifRow
|
.extern vifRow
|
||||||
|
|
||||||
#define VIF_ESP esp
|
#define VIF_ESP esp
|
||||||
#define VIF_SRC esi
|
#define VIF_SRC esi
|
||||||
|
@ -108,7 +108,7 @@
|
||||||
|
|
||||||
// setting up masks
|
// setting up masks
|
||||||
#define UNPACK_Setup_Mask_SSE(CL) \
|
#define UNPACK_Setup_Mask_SSE(CL) \
|
||||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
mov VIF_TMPADDR, vifMaskRegs; \
|
||||||
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
|
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
|
||||||
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
|
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
|
||||||
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \
|
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \
|
||||||
|
@ -118,7 +118,7 @@
|
||||||
|
|
||||||
#define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL)
|
#define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL)
|
||||||
#define UNPACK_Start_Setup_Mask_SSE_1(CL) \
|
#define UNPACK_Start_Setup_Mask_SSE_1(CL) \
|
||||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
mov VIF_TMPADDR, vifMaskRegs; \
|
||||||
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
|
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
|
||||||
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
|
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
|
||||||
pand XMM_ROWMASK, XMM_ROW; \
|
pand XMM_ROWMASK, XMM_ROW; \
|
||||||
|
@ -129,12 +129,12 @@
|
||||||
|
|
||||||
#define UNPACK_Setup_Mask_SSE_0_1(CL)
|
#define UNPACK_Setup_Mask_SSE_0_1(CL)
|
||||||
#define UNPACK_Setup_Mask_SSE_1_1(CL) \
|
#define UNPACK_Setup_Mask_SSE_1_1(CL) \
|
||||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
mov VIF_TMPADDR, vifMaskRegs; \
|
||||||
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
|
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
|
||||||
|
|
||||||
// ignore CL, since vif.cycle.wl == 1
|
// ignore CL, since vif.cycle.wl == 1
|
||||||
#define UNPACK_Setup_Mask_SSE_2_1(CL) \
|
#define UNPACK_Setup_Mask_SSE_2_1(CL) \
|
||||||
mov VIF_TMPADDR, _vifMaskRegs; \
|
mov VIF_TMPADDR, vifMaskRegs; \
|
||||||
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \
|
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \
|
||||||
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \
|
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \
|
||||||
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
|
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
|
||||||
|
@ -1312,9 +1312,9 @@
|
||||||
#pragma warning(disable:4731)
|
#pragma warning(disable:4731)
|
||||||
|
|
||||||
#define SAVE_ROW_REG_BASE \
|
#define SAVE_ROW_REG_BASE \
|
||||||
mov VIF_TMPADDR, _vifRow; \
|
mov VIF_TMPADDR, vifRow; \
|
||||||
movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \
|
movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \
|
||||||
mov VIF_TMPADDR, _vifRegs; \
|
mov VIF_TMPADDR, vifRegs; \
|
||||||
movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \
|
movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \
|
||||||
psrldq XMM_ROW, 4; \
|
psrldq XMM_ROW, 4; \
|
||||||
movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \
|
movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \
|
||||||
|
@ -1349,7 +1349,7 @@
|
||||||
.globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \
|
.globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \
|
UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \
|
||||||
INIT_ARGS(); \
|
INIT_ARGS(); \
|
||||||
mov VIF_TMPADDR, _vifRegs; \
|
mov VIF_TMPADDR, vifRegs; \
|
||||||
movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \
|
movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \
|
||||||
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \
|
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \
|
||||||
sub VIF_INC, VIF_SAVEEBX; \
|
sub VIF_INC, VIF_SAVEEBX; \
|
||||||
|
|
|
@ -5,9 +5,9 @@
|
||||||
.xmm
|
.xmm
|
||||||
|
|
||||||
|
|
||||||
extern _vifRegs:ptr
|
extern vifRegs:ptr
|
||||||
extern _vifMaskRegs:ptr
|
extern vifMaskRegs:ptr
|
||||||
extern _vifRow:ptr
|
extern vifRow:ptr
|
||||||
extern s_TempDecompress:ptr
|
extern s_TempDecompress:ptr
|
||||||
|
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0
|
||||||
|
|
||||||
|
|
||||||
UNPACK_Setup_Mask_SSE macro CL
|
UNPACK_Setup_Mask_SSE macro CL
|
||||||
mov eax, [_vifMaskRegs]
|
mov eax, [vifMaskRegs]
|
||||||
movdqa xmm4, [eax + 64*(CL) + 16]
|
movdqa xmm4, [eax + 64*(CL) + 16]
|
||||||
movdqa xmm5, [eax + 64*(CL) + 32]
|
movdqa xmm5, [eax + 64*(CL) + 32]
|
||||||
movdqa xmm3, [eax + 64*(CL)]
|
movdqa xmm3, [eax + 64*(CL)]
|
||||||
|
@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL
|
||||||
endm
|
endm
|
||||||
|
|
||||||
UNPACK_Start_Setup_Mask_SSE_1 macro CL
|
UNPACK_Start_Setup_Mask_SSE_1 macro CL
|
||||||
mov eax, [_vifMaskRegs]
|
mov eax, [vifMaskRegs]
|
||||||
movdqa xmm4, [eax + 64*(CL) + 16]
|
movdqa xmm4, [eax + 64*(CL) + 16]
|
||||||
movdqa xmm5, [eax + 64*(CL) + 32]
|
movdqa xmm5, [eax + 64*(CL) + 32]
|
||||||
pand xmm4, xmm6
|
pand xmm4, xmm6
|
||||||
|
@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL
|
||||||
UNPACK_Setup_Mask_SSE_0_1 macro CL
|
UNPACK_Setup_Mask_SSE_0_1 macro CL
|
||||||
endm
|
endm
|
||||||
UNPACK_Setup_Mask_SSE_1_1 macro CL
|
UNPACK_Setup_Mask_SSE_1_1 macro CL
|
||||||
mov eax, [_vifMaskRegs]
|
mov eax, [vifMaskRegs]
|
||||||
movdqa xmm3, [eax + 64*(0)]
|
movdqa xmm3, [eax + 64*(0)]
|
||||||
endm
|
endm
|
||||||
|
|
||||||
|
|
||||||
UNPACK_Setup_Mask_SSE_2_1 macro CL
|
UNPACK_Setup_Mask_SSE_2_1 macro CL
|
||||||
|
|
||||||
mov eax, [_vifMaskRegs]
|
mov eax, [vifMaskRegs]
|
||||||
movdqa xmm4, [eax + 64*(0) + 16]
|
movdqa xmm4, [eax + 64*(0) + 16]
|
||||||
movdqa xmm5, [eax + 64*(0) + 32]
|
movdqa xmm5, [eax + 64*(0) + 32]
|
||||||
movdqa xmm3, [eax + 64*(0)]
|
movdqa xmm3, [eax + 64*(0)]
|
||||||
|
@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
||||||
|
|
||||||
|
|
||||||
SAVE_ROW_REG_BASE macro
|
SAVE_ROW_REG_BASE macro
|
||||||
mov eax, [_vifRow]
|
mov eax, [vifRow]
|
||||||
movdqa [eax], xmm6
|
movdqa [eax], xmm6
|
||||||
mov eax, [_vifRegs]
|
mov eax, [vifRegs]
|
||||||
movss dword ptr [eax+0100h], xmm6
|
movss dword ptr [eax+0100h], xmm6
|
||||||
psrldq xmm6, 4
|
psrldq xmm6, 4
|
||||||
movss dword ptr [eax+0110h], xmm6
|
movss dword ptr [eax+0110h], xmm6
|
||||||
|
@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE
|
||||||
push ebx
|
push ebx
|
||||||
|
|
||||||
INIT_ARGS
|
INIT_ARGS
|
||||||
mov eax, [_vifRegs]
|
mov eax, [vifRegs]
|
||||||
movzx ecx, byte ptr [eax + 040h]
|
movzx ecx, byte ptr [eax + 040h]
|
||||||
movzx ebx, byte ptr [eax + 041h]
|
movzx ebx, byte ptr [eax + 041h]
|
||||||
sub ecx, ebx
|
sub ecx, ebx
|
||||||
|
|
Loading…
Reference in New Issue