mirror of https://github.com/PCSX2/pcsx2.git
Removed the old vif unpack code since pcsx2 is now using newVif.
Notes to Devs: - Linux project files probably need to be updated since I deleted some files. - In the vif0/vif1 Freeze() functions for saved states, I kept some dummy vars to keep saved state compatibility. We should remove them next time we decide to break saved state compatibility. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2461 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
7bf9a0d994
commit
a8c2941901
|
@ -31,7 +31,6 @@ void hwInit()
|
||||||
gsInit();
|
gsInit();
|
||||||
vif0Init();
|
vif0Init();
|
||||||
vif1Init();
|
vif1Init();
|
||||||
vifDmaInit();
|
|
||||||
sifInit();
|
sifInit();
|
||||||
sprInit();
|
sprInit();
|
||||||
ipuInit();
|
ipuInit();
|
||||||
|
|
|
@ -23,8 +23,6 @@
|
||||||
#include "VifDma.h"
|
#include "VifDma.h"
|
||||||
|
|
||||||
VIFregisters *vifRegs;
|
VIFregisters *vifRegs;
|
||||||
u32* vifRow = NULL;
|
|
||||||
u32* vifMaskRegs = NULL;
|
|
||||||
vifStruct *vif;
|
vifStruct *vif;
|
||||||
u16 vifqwc = 0;
|
u16 vifqwc = 0;
|
||||||
|
|
||||||
|
|
27
pcsx2/Vif.h
27
pcsx2/Vif.h
|
@ -105,7 +105,6 @@ union tVIF_STAT {
|
||||||
u32 _u32;
|
u32 _u32;
|
||||||
|
|
||||||
tVIF_STAT(u32 val) { _u32 = val; }
|
tVIF_STAT(u32 val) { _u32 = val; }
|
||||||
|
|
||||||
bool test(u32 flags) { return !!(_u32 & flags); }
|
bool test(u32 flags) { return !!(_u32 & flags); }
|
||||||
void set_flags (u32 flags) { _u32 |= flags; }
|
void set_flags (u32 flags) { _u32 |= flags; }
|
||||||
void clear_flags(u32 flags) { _u32 &= ~flags; }
|
void clear_flags(u32 flags) { _u32 &= ~flags; }
|
||||||
|
@ -126,7 +125,6 @@ union tVIF_FBRST {
|
||||||
u32 _u32;
|
u32 _u32;
|
||||||
|
|
||||||
tVIF_FBRST(u32 val) { _u32 = val; }
|
tVIF_FBRST(u32 val) { _u32 = val; }
|
||||||
|
|
||||||
bool test (u32 flags) { return !!(_u32 & flags); }
|
bool test (u32 flags) { return !!(_u32 & flags); }
|
||||||
void set_flags (u32 flags) { _u32 |= flags; }
|
void set_flags (u32 flags) { _u32 |= flags; }
|
||||||
void clear_flags(u32 flags) { _u32 &= ~flags; }
|
void clear_flags(u32 flags) { _u32 &= ~flags; }
|
||||||
|
@ -146,7 +144,6 @@ union tVIF_ERR {
|
||||||
u32 _u32;
|
u32 _u32;
|
||||||
|
|
||||||
tVIF_ERR (u32 val) { _u32 = val; }
|
tVIF_ERR (u32 val) { _u32 = val; }
|
||||||
|
|
||||||
void write(u32 val) { _u32 = val; }
|
void write(u32 val) { _u32 = val; }
|
||||||
bool test (u32 flags) { return !!(_u32 & flags); }
|
bool test (u32 flags) { return !!(_u32 & flags); }
|
||||||
void set_flags (u32 flags) { _u32 |= flags; }
|
void set_flags (u32 flags) { _u32 |= flags; }
|
||||||
|
@ -214,14 +211,7 @@ struct VIFregisters {
|
||||||
u32 addr;
|
u32 addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
// these use cdecl for Asm code references.
|
|
||||||
extern VIFregisters *vifRegs;
|
extern VIFregisters *vifRegs;
|
||||||
extern u32* vifMaskRegs;
|
|
||||||
extern u32* vifRow;
|
|
||||||
extern u32* _vifCol;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define vif0RegsRef ((VIFregisters&)PS2MEM_HW[0x3800])
|
#define vif0RegsRef ((VIFregisters&)PS2MEM_HW[0x3800])
|
||||||
#define vif1RegsRef ((VIFregisters&)PS2MEM_HW[0x3c00])
|
#define vif1RegsRef ((VIFregisters&)PS2MEM_HW[0x3c00])
|
||||||
|
@ -236,7 +226,7 @@ extern bool VIF1transfer(u32 *data, int size, bool istag);
|
||||||
extern void vifMFIFOInterrupt();
|
extern void vifMFIFOInterrupt();
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// VIF SSE-optimized Masking Mess
|
// newVif SSE-optimized Row/Col Structs
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
|
|
||||||
struct VifMaskTypes
|
struct VifMaskTypes
|
||||||
|
@ -245,19 +235,6 @@ struct VifMaskTypes
|
||||||
u32 Row1[4], Col1[4];
|
u32 Row1[4], Col1[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
extern __aligned16 VifMaskTypes g_vifmask; // This struct is used by newVif as well as oldVif code...
|
extern __aligned16 VifMaskTypes g_vifmask; // This struct is used by newVif
|
||||||
|
|
||||||
extern void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask);
|
|
||||||
|
|
||||||
#define XMM_R0 xmm0
|
|
||||||
#define XMM_R1 xmm1
|
|
||||||
#define XMM_R2 xmm2
|
|
||||||
#define XMM_WRITEMASK xmm3
|
|
||||||
#define XMM_ROWMASK xmm4
|
|
||||||
#define XMM_ROWCOLMASK xmm5
|
|
||||||
#define XMM_ROW xmm6
|
|
||||||
#define XMM_COL xmm7
|
|
||||||
|
|
||||||
#define XMM_R3 XMM_COL
|
|
||||||
|
|
||||||
#endif /* __VIF_H__ */
|
#endif /* __VIF_H__ */
|
||||||
|
|
|
@ -18,13 +18,9 @@
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
#include "VifDma_internal.h"
|
#include "VifDma_internal.h"
|
||||||
|
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
#include "newVif.h"
|
#include "newVif.h"
|
||||||
|
|
||||||
__aligned16 u32 g_vif0Masks[64];
|
|
||||||
u32 g_vif0HasMask3[4] = {0};
|
|
||||||
|
|
||||||
extern int (__fastcall *Vif0TransTLB[128])(u32 *data);
|
extern int (__fastcall *Vif0TransTLB[128])(u32 *data);
|
||||||
extern void (*Vif0CMDTLB[75])();
|
extern void (*Vif0CMDTLB[75])();
|
||||||
|
|
||||||
|
@ -43,16 +39,7 @@ __forceinline void vif0FLUSH()
|
||||||
|
|
||||||
void vif0Init()
|
void vif0Init()
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < 256; ++i)
|
|
||||||
{
|
|
||||||
s_maskwrite[i] = ((i & 3) == 3) || ((i & 0xc) == 0xc) || ((i & 0x30) == 0x30) || ((i & 0xc0) == 0xc0);
|
|
||||||
}
|
|
||||||
|
|
||||||
SetNewMask(g_vif0Masks, g_vif0HasMask3, 0, 0xffffffff);
|
|
||||||
|
|
||||||
#if newVif0
|
|
||||||
initNewVif(0);
|
initNewVif(0);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline void vif0UNPACK(u32 *data)
|
static __forceinline void vif0UNPACK(u32 *data)
|
||||||
|
@ -119,7 +106,6 @@ static int __fastcall Vif0TransNull(u32 *data) // Shouldnt go here
|
||||||
|
|
||||||
static int __fastcall Vif0TransSTMask(u32 *data) // STMASK
|
static int __fastcall Vif0TransSTMask(u32 *data) // STMASK
|
||||||
{
|
{
|
||||||
SetNewMask(g_vif0Masks, g_vif0HasMask3, data[0], vif0Regs->mask);
|
|
||||||
vif0Regs->mask = data[0];
|
vif0Regs->mask = data[0];
|
||||||
VIF_LOG("STMASK == %x", vif0Regs->mask);
|
VIF_LOG("STMASK == %x", vif0Regs->mask);
|
||||||
|
|
||||||
|
@ -226,61 +212,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
|
||||||
|
|
||||||
static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
|
static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
|
||||||
{
|
{
|
||||||
#if newVif0
|
|
||||||
return nVifUnpack(0, (u8*)data);
|
return nVifUnpack(0, (u8*)data);
|
||||||
#endif
|
|
||||||
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
XMMRegisters::Freeze();
|
|
||||||
if (vif0.vifpacketsize < vif0.tag.size)
|
|
||||||
{
|
|
||||||
if(vif0Regs->offset != 0 || vif0.cl != 0)
|
|
||||||
{
|
|
||||||
ret = vif0.tag.size;
|
|
||||||
vif0.tag.size -= vif0.vifpacketsize - VIFalign<0>(data, &vif0.tag, vif0.vifpacketsize);
|
|
||||||
ret = ret - vif0.tag.size;
|
|
||||||
data += ret;
|
|
||||||
|
|
||||||
if(vif0.vifpacketsize > 0) VIFunpack<0>(data, &vif0.tag, vif0.vifpacketsize - ret);
|
|
||||||
|
|
||||||
ProcessMemSkip<0>((vif0.vifpacketsize - ret) << 2, (vif0.cmd & 0xf));
|
|
||||||
vif0.tag.size -= (vif0.vifpacketsize - ret);
|
|
||||||
XMMRegisters::Thaw();
|
|
||||||
|
|
||||||
return vif0.vifpacketsize;
|
|
||||||
}
|
|
||||||
/* size is less that the total size, transfer is 'in pieces' */
|
|
||||||
VIFunpack<0>(data, &vif0.tag, vif0.vifpacketsize);
|
|
||||||
|
|
||||||
ProcessMemSkip<0>(vif0.vifpacketsize << 2, (vif0.cmd & 0xf));
|
|
||||||
|
|
||||||
ret = vif0.vifpacketsize;
|
|
||||||
vif0.tag.size -= ret;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* we got all the data, transfer it fully */
|
|
||||||
ret = vif0.tag.size;
|
|
||||||
|
|
||||||
//Align data after a split transfer first
|
|
||||||
if ((vif0Regs->offset != 0) || (vif0.cl != 0))
|
|
||||||
{
|
|
||||||
vif0.tag.size = VIFalign<0>(data, &vif0.tag, vif0.tag.size);
|
|
||||||
data += ret - vif0.tag.size;
|
|
||||||
if(vif0.tag.size > 0) VIFunpack<0>(data, &vif0.tag, vif0.tag.size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VIFunpack<0>(data, &vif0.tag, vif0.tag.size);
|
|
||||||
}
|
|
||||||
|
|
||||||
vif0.tag.size = 0;
|
|
||||||
vif0.cmd = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
XMMRegisters::Thaw();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -785,7 +717,6 @@ void vif0Reset()
|
||||||
/* Reset the whole VIF, meaning the internal pcsx2 vars and all the registers */
|
/* Reset the whole VIF, meaning the internal pcsx2 vars and all the registers */
|
||||||
memzero(vif0);
|
memzero(vif0);
|
||||||
memzero(*vif0Regs);
|
memzero(*vif0Regs);
|
||||||
SetNewMask(g_vif0Masks, g_vif0HasMask3, 0, 0xffffffff);
|
|
||||||
|
|
||||||
psHu64(VIF0_FIFO) = 0;
|
psHu64(VIF0_FIFO) = 0;
|
||||||
psHu64(VIF0_FIFO + 8) = 0;
|
psHu64(VIF0_FIFO + 8) = 0;
|
||||||
|
@ -795,13 +726,13 @@ void vif0Reset()
|
||||||
|
|
||||||
vif0.done = true;
|
vif0.done = true;
|
||||||
|
|
||||||
#if newVif0
|
|
||||||
resetNewVif(0);
|
resetNewVif(0);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SaveStateBase::vif0Freeze()
|
void SaveStateBase::vif0Freeze()
|
||||||
{
|
{
|
||||||
|
static u32 g_vif0Masks[64]; // Dummy Var for saved state compatibility
|
||||||
|
static u32 g_vif0HasMask3[4]; // Dummy Var for saved state compatibility
|
||||||
FreezeTag("VIFdma");
|
FreezeTag("VIFdma");
|
||||||
|
|
||||||
// Dunno if this one is needed, but whatever, it's small. :)
|
// Dunno if this one is needed, but whatever, it's small. :)
|
||||||
|
@ -811,6 +742,6 @@ void SaveStateBase::vif0Freeze()
|
||||||
Freeze(g_vifmask);
|
Freeze(g_vifmask);
|
||||||
|
|
||||||
Freeze(vif0);
|
Freeze(vif0);
|
||||||
Freeze(g_vif0HasMask3);
|
Freeze(g_vif0HasMask3); // Not Used Anymore
|
||||||
Freeze(g_vif0Masks);
|
Freeze(g_vif0Masks); // Not Used Anymore
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,9 +24,6 @@
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
#include "newVif.h"
|
#include "newVif.h"
|
||||||
|
|
||||||
__aligned16 u32 g_vif1Masks[64];
|
|
||||||
u32 g_vif1HasMask3[4] = {0};
|
|
||||||
|
|
||||||
extern void (*Vif1CMDTLB[82])();
|
extern void (*Vif1CMDTLB[82])();
|
||||||
extern int (__fastcall *Vif1TransTLB[128])(u32 *data);
|
extern int (__fastcall *Vif1TransTLB[128])(u32 *data);
|
||||||
|
|
||||||
|
@ -58,10 +55,7 @@ __forceinline void vif1FLUSH()
|
||||||
|
|
||||||
void vif1Init()
|
void vif1Init()
|
||||||
{
|
{
|
||||||
SetNewMask(g_vif1Masks, g_vif1HasMask3, 0, 0xffffffff);
|
|
||||||
#if newVif1
|
|
||||||
initNewVif(1);
|
initNewVif(1);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline void vif1UNPACK(u32 *data)
|
static __forceinline void vif1UNPACK(u32 *data)
|
||||||
|
@ -136,7 +130,6 @@ static int __fastcall Vif1TransNull(u32 *data) // Shouldnt go here
|
||||||
|
|
||||||
static int __fastcall Vif1TransSTMask(u32 *data) // STMASK
|
static int __fastcall Vif1TransSTMask(u32 *data) // STMASK
|
||||||
{
|
{
|
||||||
SetNewMask(g_vif1Masks, g_vif1HasMask3, data[0], vif1Regs->mask);
|
|
||||||
vif1Regs->mask = data[0];
|
vif1Regs->mask = data[0];
|
||||||
VIF_LOG("STMASK == %x", vif1Regs->mask);
|
VIF_LOG("STMASK == %x", vif1Regs->mask);
|
||||||
|
|
||||||
|
@ -318,57 +311,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
||||||
}
|
}
|
||||||
static int __fastcall Vif1TransUnpack(u32 *data)
|
static int __fastcall Vif1TransUnpack(u32 *data)
|
||||||
{
|
{
|
||||||
#if newVif1
|
|
||||||
return nVifUnpack(1, (u8*)data);
|
return nVifUnpack(1, (u8*)data);
|
||||||
#endif
|
|
||||||
|
|
||||||
XMMRegisters::Freeze();
|
|
||||||
|
|
||||||
if (vif1.vifpacketsize < vif1.tag.size)
|
|
||||||
{
|
|
||||||
int ret = vif1.tag.size;
|
|
||||||
// size is less that the total size, transfer is 'in pieces'
|
|
||||||
if (vif1Regs->offset != 0 || vif1.cl != 0)
|
|
||||||
{
|
|
||||||
vif1.tag.size -= vif1.vifpacketsize - VIFalign<1>(data, &vif1.tag, vif1.vifpacketsize);
|
|
||||||
ret = ret - vif1.tag.size;
|
|
||||||
data += ret;
|
|
||||||
if ((vif1.vifpacketsize - ret) > 0) VIFunpack<1>(data, &vif1.tag, vif1.vifpacketsize - ret);
|
|
||||||
ProcessMemSkip<1>((vif1.vifpacketsize - ret) << 2, (vif1.cmd & 0xf));
|
|
||||||
vif1.tag.size -= (vif1.vifpacketsize - ret);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VIFunpack<1>(data, &vif1.tag, vif1.vifpacketsize);
|
|
||||||
|
|
||||||
ProcessMemSkip<1>(vif1.vifpacketsize << 2, (vif1.cmd & 0xf));
|
|
||||||
vif1.tag.size -= vif1.vifpacketsize;
|
|
||||||
}
|
|
||||||
|
|
||||||
XMMRegisters::Thaw();
|
|
||||||
return vif1.vifpacketsize;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int ret = vif1.tag.size;
|
|
||||||
|
|
||||||
if (vif1Regs->offset != 0 || vif1.cl != 0)
|
|
||||||
{
|
|
||||||
vif1.tag.size = VIFalign<1>(data, &vif1.tag, vif1.tag.size);
|
|
||||||
data += ret - vif1.tag.size;
|
|
||||||
if (vif1.tag.size > 0) VIFunpack<1>(data, &vif1.tag, vif1.tag.size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* we got all the data, transfer it fully */
|
|
||||||
VIFunpack<1>(data, &vif1.tag, vif1.tag.size);
|
|
||||||
}
|
|
||||||
|
|
||||||
vif1.tag.size = 0;
|
|
||||||
vif1.cmd = 0;
|
|
||||||
XMMRegisters::Thaw();
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -1175,7 +1118,6 @@ void vif1Reset()
|
||||||
/* Reset the whole VIF, meaning the internal pcsx2 vars, and all the registers */
|
/* Reset the whole VIF, meaning the internal pcsx2 vars, and all the registers */
|
||||||
memzero(vif1);
|
memzero(vif1);
|
||||||
memzero(*vif1Regs);
|
memzero(*vif1Regs);
|
||||||
SetNewMask(g_vif1Masks, g_vif1HasMask3, 0, 0xffffffff);
|
|
||||||
|
|
||||||
psHu64(VIF1_FIFO) = 0;
|
psHu64(VIF1_FIFO) = 0;
|
||||||
psHu64(VIF1_FIFO + 8) = 0;
|
psHu64(VIF1_FIFO + 8) = 0;
|
||||||
|
@ -1186,15 +1128,15 @@ void vif1Reset()
|
||||||
vif1.done = true;
|
vif1.done = true;
|
||||||
cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's
|
cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's
|
||||||
|
|
||||||
#if newVif1
|
|
||||||
resetNewVif(1);
|
resetNewVif(1);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SaveStateBase::vif1Freeze()
|
void SaveStateBase::vif1Freeze()
|
||||||
{
|
{
|
||||||
|
static u32 g_vif1Masks[64]; // Dummy Var for saved state compatibility
|
||||||
|
static u32 g_vif1HasMask3[4]; // Dummy Var for saved state compatibility
|
||||||
Freeze(vif1);
|
Freeze(vif1);
|
||||||
|
|
||||||
Freeze(g_vif1HasMask3);
|
Freeze(g_vif1HasMask3); // Not Used Anymore
|
||||||
Freeze(g_vif1Masks);
|
Freeze(g_vif1Masks); // Not Used Anymore
|
||||||
}
|
}
|
||||||
|
|
737
pcsx2/VifDma.cpp
737
pcsx2/VifDma.cpp
|
@ -19,726 +19,7 @@
|
||||||
#include "VifDma_internal.h"
|
#include "VifDma_internal.h"
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
|
|
||||||
#include <xmmintrin.h>
|
|
||||||
#include <emmintrin.h>
|
|
||||||
|
|
||||||
// Extern variables
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
// Need cdecl on these for ASM references.
|
|
||||||
extern VIFregisters *vifRegs;
|
|
||||||
extern u32* vifMaskRegs;
|
|
||||||
extern u32* vifRow;
|
|
||||||
}
|
|
||||||
|
|
||||||
int g_vifCycles = 0;
|
int g_vifCycles = 0;
|
||||||
u8 s_maskwrite[256];
|
|
||||||
|
|
||||||
struct VIFSSEUnpackTable
|
|
||||||
{
|
|
||||||
// regular 0, 1, 2; mask 0, 1, 2
|
|
||||||
UNPACKPARTFUNCTYPESSE funcU[9], funcS[9];
|
|
||||||
};
|
|
||||||
|
|
||||||
#define DECL_UNPACK_TABLE_SSE(name, sign) \
|
|
||||||
extern "C" { \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_Regular_0(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_Regular_1(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_Regular_2(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_Mask_0(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_Mask_1(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_Mask_2(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_WriteMask_0(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_WriteMask_1(u32* dest, u32* data, int dmasize); \
|
|
||||||
extern int UNPACK_SkippingWrite_##name##_##sign##_WriteMask_2(u32* dest, u32* data, int dmasize); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define _UNPACK_TABLE_SSE(name, sign) \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_Regular_0, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_Regular_1, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_Regular_2, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_Mask_0, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_Mask_1, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_Mask_2, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_WriteMask_0, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_WriteMask_1, \
|
|
||||||
UNPACK_SkippingWrite_##name##_##sign##_WriteMask_2 \
|
|
||||||
|
|
||||||
#define _UNPACK_TABLE_SSE_NULL \
|
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
|
|
||||||
|
|
||||||
// Main table for function unpacking
|
|
||||||
DECL_UNPACK_TABLE_SSE(S_32, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(S_16, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(S_8, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(S_16, s);
|
|
||||||
DECL_UNPACK_TABLE_SSE(S_8, s);
|
|
||||||
|
|
||||||
DECL_UNPACK_TABLE_SSE(V2_32, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V2_16, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V2_8, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V2_16, s);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V2_8, s);
|
|
||||||
|
|
||||||
DECL_UNPACK_TABLE_SSE(V3_32, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V3_16, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V3_8, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V3_16, s);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V3_8, s);
|
|
||||||
|
|
||||||
DECL_UNPACK_TABLE_SSE(V4_32, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V4_16, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V4_8, u);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V4_16, s);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V4_8, s);
|
|
||||||
DECL_UNPACK_TABLE_SSE(V4_5, u);
|
|
||||||
|
|
||||||
static const VIFSSEUnpackTable VIFfuncTableSSE[16] =
|
|
||||||
{
|
|
||||||
{ _UNPACK_TABLE_SSE(S_32, u), _UNPACK_TABLE_SSE(S_32, u) },
|
|
||||||
{ _UNPACK_TABLE_SSE(S_16, u), _UNPACK_TABLE_SSE(S_16, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE(S_8, u), _UNPACK_TABLE_SSE(S_8, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE_NULL, _UNPACK_TABLE_SSE_NULL },
|
|
||||||
|
|
||||||
{ _UNPACK_TABLE_SSE(V2_32, u), _UNPACK_TABLE_SSE(V2_32, u) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V2_16, u), _UNPACK_TABLE_SSE(V2_16, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V2_8, u), _UNPACK_TABLE_SSE(V2_8, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE_NULL, _UNPACK_TABLE_SSE_NULL },
|
|
||||||
|
|
||||||
{ _UNPACK_TABLE_SSE(V3_32, u), _UNPACK_TABLE_SSE(V3_32, u) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V3_16, u), _UNPACK_TABLE_SSE(V3_16, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V3_8, u), _UNPACK_TABLE_SSE(V3_8, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE_NULL, _UNPACK_TABLE_SSE_NULL },
|
|
||||||
|
|
||||||
{ _UNPACK_TABLE_SSE(V4_32, u), _UNPACK_TABLE_SSE(V4_32, u) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V4_16, u), _UNPACK_TABLE_SSE(V4_16, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V4_8, u), _UNPACK_TABLE_SSE(V4_8, s) },
|
|
||||||
{ _UNPACK_TABLE_SSE(V4_5, u), _UNPACK_TABLE_SSE(V4_5, u) },
|
|
||||||
};
|
|
||||||
|
|
||||||
void vifDmaInit()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
template void ProcessMemSkip<0>(u32 size, u32 unpackType);
|
|
||||||
template void ProcessMemSkip<1>(u32 size, u32 unpackType);
|
|
||||||
template<const u32 VIFdmanum> void ProcessMemSkip(u32 size, u32 unpackType)
|
|
||||||
{
|
|
||||||
const VIFUnpackFuncTable *unpack;
|
|
||||||
|
|
||||||
// unpackType is only 0->0xf but that's ok, because the data we're using here is
|
|
||||||
// just duplicated in 0x10->0x1f.
|
|
||||||
|
|
||||||
unpack = &VIFfuncTable[ unpackType ];
|
|
||||||
|
|
||||||
switch (unpackType)
|
|
||||||
{
|
|
||||||
case 0x0:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing S-32 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x1:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing S-16 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x2:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing S-8 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x4:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V2-32 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x5:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V2-16 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x6:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x8:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V3-32 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0x9:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0xA:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V3-8 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0xC:
|
|
||||||
vif->tag.addr += size;
|
|
||||||
VIFUNPACK_LOG("Processing V4-32 skip, size = %d, CL = %d, WL = %d", size, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
|
||||||
break;
|
|
||||||
case 0xD:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V4-16 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0xE:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V4-8 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
case 0xF:
|
|
||||||
vif->tag.addr += (size / unpack->gsize) * 16;
|
|
||||||
VIFUNPACK_LOG("Processing V4-5 skip, size = %d", size);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Console.WriteLn("Invalid unpack type %x", unpackType);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Append any skips in to the equation
|
|
||||||
|
|
||||||
if (vifRegs->cycle.cl > vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
VIFUNPACK_LOG("Old addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
|
||||||
vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16);
|
|
||||||
VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
|
||||||
}
|
|
||||||
|
|
||||||
//This is sorted out later
|
|
||||||
if ((vif->tag.addr & 0xf) != (vifRegs->offset * 4))
|
|
||||||
{
|
|
||||||
VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr);
|
|
||||||
vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
|
||||||
{
|
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template u32 VIFalign<0>(u32 *data, vifCode *v, u32 size);
|
|
||||||
template u32 VIFalign<1>(u32 *data, vifCode *v, u32 size);
|
|
||||||
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
|
||||||
{
|
|
||||||
u32 *dest;
|
|
||||||
VURegs * VU;
|
|
||||||
u8 *cdata = (u8*)data;
|
|
||||||
|
|
||||||
u32 memsize = vif_size(VIFdmanum);
|
|
||||||
|
|
||||||
if (VIFdmanum == 0)
|
|
||||||
{
|
|
||||||
VU = &VU0;
|
|
||||||
vifRegs = vif0Regs;
|
|
||||||
vifMaskRegs = g_vif0Masks;
|
|
||||||
vif = &vif0;
|
|
||||||
vifRow = g_vifmask.Row0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VU = &VU1;
|
|
||||||
vifRegs = vif1Regs;
|
|
||||||
vifMaskRegs = g_vif1Masks;
|
|
||||||
vif = &vif1;
|
|
||||||
vifRow = g_vifmask.Row1;
|
|
||||||
}
|
|
||||||
pxAssume(v->addr < memsize);
|
|
||||||
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
|
|
||||||
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
|
||||||
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
|
||||||
|
|
||||||
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
|
|
||||||
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
|
|
||||||
|
|
||||||
size <<= 2;
|
|
||||||
memsize = size;
|
|
||||||
|
|
||||||
if(vifRegs->offset != 0)
|
|
||||||
{
|
|
||||||
int unpacksize;
|
|
||||||
|
|
||||||
//This is just to make sure the alignment isn't loopy on a split packet
|
|
||||||
if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
|
|
||||||
{
|
|
||||||
DevCon.Error("Warning: Unpack alignment error");
|
|
||||||
}
|
|
||||||
|
|
||||||
VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
|
||||||
|
|
||||||
if (((u32)size / (u32)ft.dsize) < ((u32)ft.qsize - vifRegs->offset))
|
|
||||||
{
|
|
||||||
DevCon.Error("Wasn't enough left size/dsize = %x left to write %x", (size / ft.dsize), (ft.qsize - vifRegs->offset));
|
|
||||||
}
|
|
||||||
unpacksize = min((size / ft.dsize), (ft.qsize - vifRegs->offset));
|
|
||||||
|
|
||||||
|
|
||||||
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft.qsize) + unpacksize, vifRegs->offset);
|
|
||||||
|
|
||||||
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, unpacksize);
|
|
||||||
size -= unpacksize * ft.dsize;
|
|
||||||
|
|
||||||
if(vifRegs->offset == 0)
|
|
||||||
{
|
|
||||||
vifRegs->num--;
|
|
||||||
++vif->cl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
DevCon.Warning("Offset = %x", vifRegs->offset);
|
|
||||||
vif->tag.addr += unpacksize * 4;
|
|
||||||
return size>>2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft.qsize) + unpacksize)) * 4;
|
|
||||||
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + (4 - ft.qsize) + unpacksize;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
vif->tag.addr += ((4 - ft.qsize) + unpacksize) * 4;
|
|
||||||
dest += (4 - ft.qsize) + unpacksize;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
|
||||||
{
|
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
cdata += unpacksize * ft.dsize;
|
|
||||||
vif->cl = 0;
|
|
||||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
|
||||||
if ((size & 0xf) == 0) return size >> 2;
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
vif->tag.addr += ((4 - ft.qsize) + unpacksize) * 4;
|
|
||||||
dest += (4 - ft.qsize) + unpacksize;
|
|
||||||
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
|
||||||
{
|
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
cdata += unpacksize * ft.dsize;
|
|
||||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vif->cl != 0 || (size & 0xf)) //Check alignment for SSE unpacks
|
|
||||||
{
|
|
||||||
int incdest;
|
|
||||||
|
|
||||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
|
||||||
{
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
|
||||||
{
|
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
// continuation from last stream
|
|
||||||
VIFUNPACK_LOG("Continuing last stream size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
|
||||||
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
|
||||||
|
|
||||||
while ((size >= ft.gsize) && (vifRegs->num > 0))
|
|
||||||
{
|
|
||||||
func(dest, (u32*)cdata);
|
|
||||||
cdata += ft.gsize;
|
|
||||||
size -= ft.gsize;
|
|
||||||
|
|
||||||
vifRegs->num--;
|
|
||||||
++vif->cl;
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
dest += incdest;
|
|
||||||
vif->tag.addr += incdest * 4;
|
|
||||||
|
|
||||||
vif->cl = 0;
|
|
||||||
if ((size & 0xf) == 0) break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
dest += 4;
|
|
||||||
vif->tag.addr += 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
|
||||||
{
|
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
|
||||||
{
|
|
||||||
//Update the reg rows for SSE
|
|
||||||
vifRow = VIFdmanum ? g_vifmask.Row1 : g_vifmask.Row0;
|
|
||||||
vifRow[0] = vifRegs->r0;
|
|
||||||
vifRow[1] = vifRegs->r1;
|
|
||||||
vifRow[2] = vifRegs->r2;
|
|
||||||
vifRow[3] = vifRegs->r3;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if (size >= ft.dsize && vifRegs->num > 0 && ((size & 0xf) != 0 || vif->cl != 0))
|
|
||||||
{
|
|
||||||
//VIF_LOG("warning, end with size = %d", size);
|
|
||||||
/* unpack one qword */
|
|
||||||
if(vif->tag.addr + ((size / ft.dsize) * 4) >= (u32)vif_size(VIFdmanum))
|
|
||||||
{
|
|
||||||
//DevCon.Warning("Overflow");
|
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
vif->tag.addr += (size / ft.dsize) * 4;
|
|
||||||
|
|
||||||
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
|
||||||
{
|
|
||||||
//Update the reg rows for SSE
|
|
||||||
vifRow[0] = vifRegs->r0;
|
|
||||||
vifRow[1] = vifRegs->r1;
|
|
||||||
vifRow[2] = vifRegs->r2;
|
|
||||||
vifRow[3] = vifRegs->r3;
|
|
||||||
}
|
|
||||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return size>>2;
|
|
||||||
}
|
|
||||||
#include "newVif.h"
|
|
||||||
#if !newVif
|
|
||||||
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
|
|
||||||
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
|
||||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
|
||||||
{
|
|
||||||
//DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
|
||||||
u32 *dest;
|
|
||||||
VURegs * VU;
|
|
||||||
u8 *cdata = (u8*)data;
|
|
||||||
u32 tempsize = 0;
|
|
||||||
const u32 memlimit = vif_size(VIFdmanum);
|
|
||||||
|
|
||||||
pxDebugCode( u32 memsize = memlimit );
|
|
||||||
|
|
||||||
_mm_prefetch((char*)data, _MM_HINT_NTA);
|
|
||||||
|
|
||||||
if (VIFdmanum == 0)
|
|
||||||
{
|
|
||||||
VU = &VU0;
|
|
||||||
vifRegs = vif0Regs;
|
|
||||||
vifMaskRegs = g_vif0Masks;
|
|
||||||
vif = &vif0;
|
|
||||||
vifRow = g_vifmask.Row0;
|
|
||||||
pxDebugCode( pxAssume(v->addr < memsize) );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
VU = &VU1;
|
|
||||||
vifRegs = vif1Regs;
|
|
||||||
vifMaskRegs = g_vif1Masks;
|
|
||||||
vif = &vif1;
|
|
||||||
vifRow = g_vifmask.Row1;
|
|
||||||
pxDebugCode( pxAssume(v->addr < memsize) );
|
|
||||||
}
|
|
||||||
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
|
|
||||||
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
|
||||||
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
|
||||||
|
|
||||||
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
|
||||||
|
|
||||||
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
|
||||||
|
|
||||||
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
|
|
||||||
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
|
|
||||||
|
|
||||||
size <<= 2;
|
|
||||||
|
|
||||||
pxDebugCode( memsize = size );
|
|
||||||
|
|
||||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
|
|
||||||
{
|
|
||||||
if (v->addr >= memlimit)
|
|
||||||
{
|
|
||||||
//DevCon.Warning("Overflown at the start");
|
|
||||||
v->addr &= (memlimit - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
size = std::min<u32>(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
|
|
||||||
|
|
||||||
tempsize = vif->tag.addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
|
||||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
|
||||||
|
|
||||||
/*tempsize = vif->tag.addr + (((size / (ft.gsize * vifRegs->cycle.wl)) *
|
|
||||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);*/
|
|
||||||
|
|
||||||
//Sanity Check (memory overflow)
|
|
||||||
if (tempsize > memlimit)
|
|
||||||
{
|
|
||||||
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
|
|
||||||
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize)))
|
|
||||||
{
|
|
||||||
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
|
|
||||||
tempsize = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
|
|
||||||
tempsize = size;
|
|
||||||
size = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#ifndef NON_SSE_UNPACKS
|
|
||||||
tempsize = 0;
|
|
||||||
#else
|
|
||||||
tempsize = size;
|
|
||||||
size = 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size >= ft.gsize)
|
|
||||||
{
|
|
||||||
const UNPACKPARTFUNCTYPESSE* pfn;
|
|
||||||
int writemask;
|
|
||||||
u32 oldcycle = -1;
|
|
||||||
|
|
||||||
// yay evil .. let's just set some XMM registers in the middle of C code
|
|
||||||
// and "hope" they get preserved, in spite of the fact that x86-32 ABI specifies
|
|
||||||
// these as "clobberable" registers (so any printf or something could decide to
|
|
||||||
// clobber them, and has every right to... >_<) --air
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
if (VIFdmanum)
|
|
||||||
{
|
|
||||||
__asm movaps XMM_ROW, xmmword ptr [g_vifmask.Row1]
|
|
||||||
__asm movaps XMM_COL, xmmword ptr [g_vifmask.Col1]
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
__asm movaps XMM_ROW, xmmword ptr [g_vifmask.Row0]
|
|
||||||
__asm movaps XMM_COL, xmmword ptr [g_vifmask.Col0]
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// I'd add volatile to these, but what's the point? This code already breaks
|
|
||||||
// like 5000 coveted rules of binary interfacing regardless, and is only working by
|
|
||||||
// the miracles and graces of a profound deity (or maybe it doesn't -- linux port
|
|
||||||
// *does* have stability issues, especially in GCC 4.4). --air
|
|
||||||
if (VIFdmanum)
|
|
||||||
{
|
|
||||||
__asm__(".intel_syntax noprefix\n"
|
|
||||||
"movaps xmm6, xmmword ptr [%[Row1]]\n"
|
|
||||||
"movaps xmm7, xmmword ptr [%[Col1]]\n"
|
|
||||||
".att_syntax\n" : : [Row1]"r"(g_vifmask.Row1), [Col1]"r"(g_vifmask.Col1));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
__asm__(".intel_syntax noprefix\n"
|
|
||||||
"movaps xmm6, xmmword ptr [%[Row0]]\n"
|
|
||||||
"movaps xmm7, xmmword ptr [%[Col0]]\n"
|
|
||||||
".att_syntax\n" : : [Row0]"r"(g_vifmask.Row0), [Col0]"r"(g_vifmask.Col0));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((vifRegs->cycle.cl == 0) || (vifRegs->cycle.wl == 0) ||
|
|
||||||
((vifRegs->cycle.cl == vifRegs->cycle.wl) && !(vifRegs->code & 0x10000000)))
|
|
||||||
{
|
|
||||||
oldcycle = *(u32*) & vifRegs->cycle;
|
|
||||||
vifRegs->cycle.cl = vifRegs->cycle.wl = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
pfn = vif->usn ? VIFfuncTableSSE[v->cmd & 0xf].funcU : VIFfuncTableSSE[v->cmd & 0xf].funcS;
|
|
||||||
writemask = VIFdmanum ? g_vif1HasMask3[min(vifRegs->cycle.wl,(u8)3)] : g_vif0HasMask3[min(vifRegs->cycle.wl,(u8)3)];
|
|
||||||
writemask = pfn[(((vifRegs->code & 0x10000000)>>28)<<writemask)*3+vifRegs->mode](dest, (u32*)cdata, size);
|
|
||||||
|
|
||||||
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
|
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
|
||||||
{
|
|
||||||
//Update the reg rows for non SSE
|
|
||||||
vifRegs->r0 = vifRow[0];
|
|
||||||
vifRegs->r1 = vifRow[1];
|
|
||||||
vifRegs->r2 = vifRow[2];
|
|
||||||
vifRegs->r3 = vifRow[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
// if size is left over, update the src,dst pointers
|
|
||||||
if (writemask > 0)
|
|
||||||
{
|
|
||||||
int left = (size - writemask) / ft.gsize;
|
|
||||||
cdata += left * ft.gsize;
|
|
||||||
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
|
|
||||||
vifRegs->num -= left;
|
|
||||||
vif->cl = (size % (ft.gsize * vifRegs->cycle.wl)) / ft.gsize;
|
|
||||||
size = writemask;
|
|
||||||
|
|
||||||
if (size >= ft.dsize && vifRegs->num > 0)
|
|
||||||
{
|
|
||||||
VIF_LOG("warning, end with size = %d", size);
|
|
||||||
|
|
||||||
/* unpack one qword */
|
|
||||||
//vif->tag.addr += (size / ft.dsize) * 4;
|
|
||||||
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
|
||||||
{
|
|
||||||
//Update the reg rows for SSE
|
|
||||||
vifRow[0] = vifRegs->r0;
|
|
||||||
vifRow[1] = vifRegs->r1;
|
|
||||||
vifRow[2] = vifRegs->r2;
|
|
||||||
vifRow[3] = vifRegs->r3;
|
|
||||||
}
|
|
||||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
vifRegs->num -= size / ft.gsize;
|
|
||||||
if (vifRegs->num > 0) vif->cl = (size % (ft.gsize * vifRegs->cycle.wl)) / ft.gsize;
|
|
||||||
size = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if(tempsize)
|
|
||||||
{
|
|
||||||
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
|
||||||
size = 0;
|
|
||||||
int addrstart = v->addr;
|
|
||||||
|
|
||||||
#ifndef NON_SSE_UNPACKS // spams pointlessly when SSE unpacks are disabled
|
|
||||||
//if((tempsize >> 2) != vif->tag.size) DevCon.Warning("split when size != tagsize");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, vif->tag.addr);
|
|
||||||
|
|
||||||
while ((tempsize >= ft.gsize) && (vifRegs->num > 0))
|
|
||||||
{
|
|
||||||
if(v->addr >= memlimit)
|
|
||||||
{
|
|
||||||
DevCon.Warning("Mem limit overflow");
|
|
||||||
v->addr &= (memlimit - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
func(dest, (u32*)cdata);
|
|
||||||
cdata += ft.gsize;
|
|
||||||
tempsize -= ft.gsize;
|
|
||||||
|
|
||||||
vifRegs->num--;
|
|
||||||
++vif->cl;
|
|
||||||
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
dest += incdest;
|
|
||||||
v->addr += (incdest * 4);
|
|
||||||
vif->cl = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
dest += 4;
|
|
||||||
v->addr += 16;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vifRegs->mode == 2)
|
|
||||||
{
|
|
||||||
//Update the reg rows for SSE
|
|
||||||
vifRow[0] = vifRegs->r0;
|
|
||||||
vifRow[1] = vifRegs->r1;
|
|
||||||
vifRow[2] = vifRegs->r2;
|
|
||||||
vifRow[3] = vifRegs->r3;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (v->addr >= memlimit)
|
|
||||||
{
|
|
||||||
v->addr &= (memlimit - 1);
|
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
v->addr = addrstart;
|
|
||||||
if(tempsize > 0) size = tempsize;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size >= ft.dsize && vifRegs->num > 0) //Else write what we do have
|
|
||||||
{
|
|
||||||
VIF_LOG("warning, end with size = %d", size);
|
|
||||||
|
|
||||||
/* unpack one qword */
|
|
||||||
//vif->tag.addr += (size / ft.dsize) * 4;
|
|
||||||
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
|
||||||
{
|
|
||||||
//Update the reg rows for SSE
|
|
||||||
vifRow[0] = vifRegs->r0;
|
|
||||||
vifRow[1] = vifRegs->r1;
|
|
||||||
vifRow[2] = vifRegs->r2;
|
|
||||||
vifRow[3] = vifRegs->r3;
|
|
||||||
}
|
|
||||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else /* filling write */
|
|
||||||
{
|
|
||||||
|
|
||||||
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
|
||||||
if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
|
||||||
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
|
||||||
|
|
||||||
//DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
|
||||||
while (vifRegs->num > 0)
|
|
||||||
{
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
vif->cl = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vif->cl < vifRegs->cycle.cl) /* unpack one qword */
|
|
||||||
{
|
|
||||||
if(size < ft.gsize)
|
|
||||||
{
|
|
||||||
VIF_LOG("Out of Filling write data");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
func(dest, (u32*)cdata);
|
|
||||||
cdata += ft.gsize;
|
|
||||||
size -= ft.gsize;
|
|
||||||
|
|
||||||
vif->cl++;
|
|
||||||
vifRegs->num--;
|
|
||||||
|
|
||||||
if (vif->cl == vifRegs->cycle.wl)
|
|
||||||
{
|
|
||||||
vif->cl = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
func(dest, (u32*)cdata);
|
|
||||||
vif->tag.addr += 16;
|
|
||||||
vifRegs->num--;
|
|
||||||
++vif->cl;
|
|
||||||
|
|
||||||
}
|
|
||||||
dest += 4;
|
|
||||||
if (vifRegs->num == 0) break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // #if !newVif
|
|
||||||
|
|
||||||
template void vuExecMicro<0>(u32 addr);
|
template void vuExecMicro<0>(u32 addr);
|
||||||
template void vuExecMicro<1>(u32 addr);
|
template void vuExecMicro<1>(u32 addr);
|
||||||
|
@ -746,13 +27,11 @@ template<const u32 VIFdmanum> void vuExecMicro(u32 addr)
|
||||||
{
|
{
|
||||||
VURegs * VU;
|
VURegs * VU;
|
||||||
|
|
||||||
if (VIFdmanum == 0)
|
if (VIFdmanum == 0) {
|
||||||
{
|
|
||||||
VU = &VU0;
|
VU = &VU0;
|
||||||
vif0FLUSH();
|
vif0FLUSH();
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
{
|
|
||||||
VU = &VU1;
|
VU = &VU1;
|
||||||
vif1FLUSH();
|
vif1FLUSH();
|
||||||
}
|
}
|
||||||
|
@ -768,22 +47,18 @@ template<const u32 VIFdmanum> void vuExecMicro(u32 addr)
|
||||||
VU->vifRegs->top = VU->vifRegs->tops & 0x3ff;
|
VU->vifRegs->top = VU->vifRegs->tops & 0x3ff;
|
||||||
|
|
||||||
/* is DBF flag set in VIF_STAT? */
|
/* is DBF flag set in VIF_STAT? */
|
||||||
if (VU->vifRegs->stat.DBF)
|
if (VU->vifRegs->stat.DBF) {
|
||||||
{
|
|
||||||
/* it is, so set tops with base, and clear the stat DBF flag */
|
/* it is, so set tops with base, and clear the stat DBF flag */
|
||||||
VU->vifRegs->tops = VU->vifRegs->base;
|
VU->vifRegs->tops = VU->vifRegs->base;
|
||||||
VU->vifRegs->stat.DBF = false;
|
VU->vifRegs->stat.DBF = false;
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
{
|
|
||||||
/* it is not, so set tops with base + offset, and set stat DBF flag */
|
/* it is not, so set tops with base + offset, and set stat DBF flag */
|
||||||
VU->vifRegs->tops = VU->vifRegs->base + VU->vifRegs->ofst;
|
VU->vifRegs->tops = VU->vifRegs->base + VU->vifRegs->ofst;
|
||||||
VU->vifRegs->stat.DBF = true;
|
VU->vifRegs->stat.DBF = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VIFdmanum == 0)
|
if (!VIFdmanum) vu0ExecMicro(addr);
|
||||||
vu0ExecMicro(addr);
|
else vu1ExecMicro(addr);
|
||||||
else
|
|
||||||
vu1ExecMicro(addr);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,8 +47,6 @@ extern vifStruct vif0, vif1;
|
||||||
extern u8 schedulepath3msk;
|
extern u8 schedulepath3msk;
|
||||||
static const int VifCycleVoodoo = 4;
|
static const int VifCycleVoodoo = 4;
|
||||||
|
|
||||||
extern void vifDmaInit();
|
|
||||||
|
|
||||||
extern void vif0Init();
|
extern void vif0Init();
|
||||||
extern void vif0Interrupt();
|
extern void vif0Interrupt();
|
||||||
extern void vif0Write32(u32 mem, u32 value);
|
extern void vif0Write32(u32 mem, u32 value);
|
||||||
|
|
|
@ -65,36 +65,16 @@ struct VIFUnpackFuncTable
|
||||||
|
|
||||||
extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
|
extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
|
||||||
|
|
||||||
extern __aligned16 u32 g_vif0Masks[64], g_vif1Masks[64];
|
|
||||||
extern u32 g_vif0HasMask3[4], g_vif1HasMask3[4];
|
|
||||||
extern int g_vifCycles;
|
extern int g_vifCycles;
|
||||||
extern u8 s_maskwrite[256];
|
|
||||||
extern vifStruct *vif;
|
extern vifStruct *vif;
|
||||||
|
|
||||||
template<const u32 VIFdmanum> void ProcessMemSkip(u32 size, u32 unpackType);
|
|
||||||
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size);
|
|
||||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size);
|
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size);
|
||||||
template<const u32 VIFdmanum> void vuExecMicro(u32 addr);
|
template<const u32 VIFdmanum> void vuExecMicro(u32 addr);
|
||||||
extern void vif0FLUSH();
|
extern void vif0FLUSH();
|
||||||
extern void vif1FLUSH();
|
extern void vif1FLUSH();
|
||||||
|
|
||||||
static __forceinline u32 vif_size(u8 num)
|
|
||||||
{
|
|
||||||
return (num == 0) ? 0x1000 : 0x4000;
|
|
||||||
}
|
|
||||||
|
|
||||||
// All defines are enabled with '1' or disabled with '0'
|
|
||||||
|
|
||||||
#define newVif 1 // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
|
||||||
#define newVif1 1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
|
||||||
#define newVif0 1 // Use New Code for Vif0 Unpacks (needs newVif defined)
|
|
||||||
|
|
||||||
#if newVif
|
|
||||||
extern int nVifUnpack (int idx, u8 *data);
|
extern int nVifUnpack (int idx, u8 *data);
|
||||||
extern void initNewVif (int idx);
|
extern void initNewVif (int idx);
|
||||||
extern void resetNewVif(int idx);
|
extern void resetNewVif(int idx);
|
||||||
#else
|
|
||||||
//# define NON_SSE_UNPACKS // Turns off SSE Unpacks (slower)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -828,18 +828,6 @@
|
||||||
RelativePath="..\..\VIFunpack.cpp"
|
RelativePath="..\..\VIFunpack.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<Filter
|
|
||||||
Name="Dynarec"
|
|
||||||
>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\x86\ix86-32\aVif_proc-32.asm"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\x86\iVif.cpp"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
</Filter>
|
|
||||||
<Filter
|
<Filter
|
||||||
Name="newVif"
|
Name="newVif"
|
||||||
>
|
>
|
||||||
|
|
|
@ -16,8 +16,6 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "VifUnpackSSE.h"
|
#include "VifUnpackSSE.h"
|
||||||
|
|
||||||
#if newVif
|
|
||||||
|
|
||||||
#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
|
#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
|
||||||
#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
|
#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
|
||||||
#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
|
#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
|
||||||
|
@ -38,6 +36,30 @@ void mergeVectors(int dest, int src, int temp, int xyzw) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loads Row/Col Data from vifRegs instead of g_vifmask
|
||||||
|
// Useful for testing vifReg and g_vifmask inconsistency.
|
||||||
|
void loadRowCol(nVifStruct& v) {
|
||||||
|
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
|
||||||
|
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
|
||||||
|
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
|
||||||
|
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
|
||||||
|
xPSHUF.D(xmm0, xmm0, _v0);
|
||||||
|
xPSHUF.D(xmm1, xmm1, _v0);
|
||||||
|
xPSHUF.D(xmm2, xmm2, _v0);
|
||||||
|
xPSHUF.D(xmm6, xmm6, _v0);
|
||||||
|
mVUmergeRegs(XMM6, XMM0, 8);
|
||||||
|
mVUmergeRegs(XMM6, XMM1, 4);
|
||||||
|
mVUmergeRegs(XMM6, XMM2, 2);
|
||||||
|
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
|
||||||
|
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
|
||||||
|
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
|
||||||
|
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
|
||||||
|
xPSHUF.D(xmm2, xmm2, _v0);
|
||||||
|
xPSHUF.D(xmm3, xmm3, _v0);
|
||||||
|
xPSHUF.D(xmm4, xmm4, _v0);
|
||||||
|
xPSHUF.D(xmm5, xmm5, _v0);
|
||||||
|
}
|
||||||
|
|
||||||
// =====================================================================================================
|
// =====================================================================================================
|
||||||
// VifUnpackSSE_Base Section
|
// VifUnpackSSE_Base Section
|
||||||
// =====================================================================================================
|
// =====================================================================================================
|
||||||
|
@ -286,5 +308,3 @@ void VifUnpackSSE_Init()
|
||||||
|
|
||||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -24,9 +24,8 @@
|
||||||
|
|
||||||
using namespace x86Emitter;
|
using namespace x86Emitter;
|
||||||
|
|
||||||
#if newVif
|
|
||||||
|
|
||||||
extern void mergeVectors(int dest, int src, int temp, int xyzw);
|
extern void mergeVectors(int dest, int src, int temp, int xyzw);
|
||||||
|
extern void loadRowCol(nVifStruct& v);
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// VifUnpackSSE_Base
|
// VifUnpackSSE_Base
|
||||||
|
@ -143,4 +142,4 @@ protected:
|
||||||
return fillingWrite;
|
return fillingWrite;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
|
@ -20,8 +20,6 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "VifUnpackSSE.h"
|
#include "VifUnpackSSE.h"
|
||||||
|
|
||||||
#if newVif
|
|
||||||
|
|
||||||
static __aligned16 nVifBlock _vBlock = {0};
|
static __aligned16 nVifBlock _vBlock = {0};
|
||||||
static __pagealigned u8 nVifMemCmp[__pagesize];
|
static __pagealigned u8 nVifMemCmp[__pagesize];
|
||||||
|
|
||||||
|
@ -39,30 +37,6 @@ void dVifClose(int idx) {
|
||||||
safe_delete(nVif[idx].vifBlocks);
|
safe_delete(nVif[idx].vifBlocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loads Row/Col Data from vifRegs instead of g_vifmask
|
|
||||||
// Useful for testing vifReg and g_vifmask inconsistency.
|
|
||||||
static void loadRowCol(nVifStruct& v) {
|
|
||||||
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
|
|
||||||
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
|
|
||||||
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
|
|
||||||
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
|
|
||||||
xPSHUF.D(xmm0, xmm0, _v0);
|
|
||||||
xPSHUF.D(xmm1, xmm1, _v0);
|
|
||||||
xPSHUF.D(xmm2, xmm2, _v0);
|
|
||||||
xPSHUF.D(xmm6, xmm6, _v0);
|
|
||||||
mVUmergeRegs(XMM6, XMM0, 8);
|
|
||||||
mVUmergeRegs(XMM6, XMM1, 4);
|
|
||||||
mVUmergeRegs(XMM6, XMM2, 2);
|
|
||||||
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
|
|
||||||
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
|
|
||||||
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
|
|
||||||
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
|
|
||||||
xPSHUF.D(xmm2, xmm2, _v0);
|
|
||||||
xPSHUF.D(xmm3, xmm3, _v0);
|
|
||||||
xPSHUF.D(xmm4, xmm4, _v0);
|
|
||||||
xPSHUF.D(xmm5, xmm5, _v0);
|
|
||||||
}
|
|
||||||
|
|
||||||
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
||||||
: v(vif_)
|
: v(vif_)
|
||||||
, vB(vifBlock_)
|
, vB(vifBlock_)
|
||||||
|
@ -291,5 +265,3 @@ _f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
|
||||||
// the interpreter unpacker though, so a recursive call is the safest way here...
|
// the interpreter unpacker though, so a recursive call is the safest way here...
|
||||||
dVifUnpack(idx, data, size, isFill);
|
dVifUnpack(idx, data, size, isFill);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
1607
pcsx2/x86/aVif.S
1607
pcsx2/x86/aVif.S
File diff suppressed because it is too large
Load Diff
1941
pcsx2/x86/aVif.asm
1941
pcsx2/x86/aVif.asm
File diff suppressed because it is too large
Load Diff
|
@ -1,92 +0,0 @@
|
||||||
/* PCSX2 - PS2 Emulator for PCs
|
|
||||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
|
||||||
*
|
|
||||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
||||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
||||||
* ation, either version 3 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE. See the GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "PrecompiledHeader.h"
|
|
||||||
|
|
||||||
#include "Common.h"
|
|
||||||
#include "Vif.h"
|
|
||||||
#include "VUmicro.h"
|
|
||||||
|
|
||||||
#include <xmmintrin.h>
|
|
||||||
#include <emmintrin.h>
|
|
||||||
|
|
||||||
// sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com)
|
|
||||||
extern u32 g_vif1Masks[48], g_vif0Masks[48];
|
|
||||||
extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4];
|
|
||||||
|
|
||||||
// arranged in writearr, rowarr, colarr, updatearr
|
|
||||||
static const __aligned16 u32 s_maskarr[16][4] = {
|
|
||||||
{0xffffffff, 0x00000000, 0x00000000, 0xffffffff},
|
|
||||||
{0xffff0000, 0x0000ffff, 0x00000000, 0xffffffff},
|
|
||||||
{0xffff0000, 0x00000000, 0x0000ffff, 0xffffffff},
|
|
||||||
{0xffff0000, 0x00000000, 0x00000000, 0xffff0000},
|
|
||||||
{0x0000ffff, 0xffff0000, 0x00000000, 0xffffffff},
|
|
||||||
{0x00000000, 0xffffffff, 0x00000000, 0xffffffff},
|
|
||||||
{0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff},
|
|
||||||
{0x00000000, 0xffff0000, 0x00000000, 0xffff0000},
|
|
||||||
{0x0000ffff, 0x00000000, 0xffff0000, 0xffffffff},
|
|
||||||
{0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff},
|
|
||||||
{0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
|
|
||||||
{0x00000000, 0x00000000, 0xffff0000, 0xffff0000},
|
|
||||||
{0x0000ffff, 0x00000000, 0x00000000, 0x0000ffff},
|
|
||||||
{0x00000000, 0x0000ffff, 0x00000000, 0x0000ffff},
|
|
||||||
{0x00000000, 0x00000000, 0x0000ffff, 0x0000ffff},
|
|
||||||
{0x00000000, 0x00000000, 0x00000000, 0x00000000}
|
|
||||||
};
|
|
||||||
|
|
||||||
extern u8 s_maskwrite[256];
|
|
||||||
|
|
||||||
// Dear C++: Please don't mangle this name, thanks!
|
|
||||||
extern "C" __aligned16 u32 s_TempDecompress[4];
|
|
||||||
__aligned16 u32 s_TempDecompress[4] = {0};
|
|
||||||
|
|
||||||
// Note: this function used to break regularly on Linux due to stack alignment.
|
|
||||||
// Refer to old revisions of this code if it breaks again for workarounds.
|
|
||||||
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
|
||||||
{
|
|
||||||
u32 i;
|
|
||||||
u32 prev = 0;
|
|
||||||
|
|
||||||
XMMRegisters::Freeze();
|
|
||||||
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
|
||||||
|
|
||||||
prev |= s_maskwrite[mask&0xff];
|
|
||||||
hasmask[i] = prev;
|
|
||||||
|
|
||||||
if ((mask&0xff) != (oldmask&0xff))
|
|
||||||
{
|
|
||||||
__m128i r0, r1, r2, r3;
|
|
||||||
r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]); // Tends to crash Linux,
|
|
||||||
r2 = _mm_unpackhi_epi16(r0, r0);
|
|
||||||
r0 = _mm_unpacklo_epi16(r0, r0);
|
|
||||||
|
|
||||||
r1 = _mm_load_si128((__m128i*)&s_maskarr[(mask>>4)&15][0]);
|
|
||||||
r3 = _mm_unpackhi_epi16(r1, r1);
|
|
||||||
r1 = _mm_unpacklo_epi16(r1, r1);
|
|
||||||
|
|
||||||
_mm_storel_pi((__m64*)&vif1masks[0], *(__m128*)&r0);
|
|
||||||
_mm_storel_pi((__m64*)&vif1masks[2], *(__m128*)&r1);
|
|
||||||
_mm_storeh_pi((__m64*)&vif1masks[4], *(__m128*)&r0);
|
|
||||||
_mm_storeh_pi((__m64*)&vif1masks[6], *(__m128*)&r1);
|
|
||||||
|
|
||||||
_mm_storel_pi((__m64*)&vif1masks[8], *(__m128*)&r2);
|
|
||||||
_mm_storel_pi((__m64*)&vif1masks[10], *(__m128*)&r3);
|
|
||||||
_mm_storeh_pi((__m64*)&vif1masks[12], *(__m128*)&r2);
|
|
||||||
_mm_storeh_pi((__m64*)&vif1masks[14], *(__m128*)&r3);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
XMMRegisters::Thaw();
|
|
||||||
}
|
|
|
@ -21,8 +21,6 @@
|
||||||
#include "x86emitter/x86emitter.h"
|
#include "x86emitter/x86emitter.h"
|
||||||
using namespace x86Emitter;
|
using namespace x86Emitter;
|
||||||
|
|
||||||
#if newVif
|
|
||||||
|
|
||||||
// newVif_HashBucket.h uses this typedef, so it has to be decared first.
|
// newVif_HashBucket.h uses this typedef, so it has to be decared first.
|
||||||
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
||||||
typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src);
|
typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src);
|
||||||
|
@ -104,4 +102,3 @@ extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
|
||||||
static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl
|
static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl
|
||||||
static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl
|
static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -24,21 +24,17 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
VURegs * VU;
|
VURegs * VU;
|
||||||
u8 *cdata = (u8*)data;
|
u8 *cdata = (u8*)data;
|
||||||
u32 tempsize = 0;
|
u32 tempsize = 0;
|
||||||
const u32 memlimit = vif_size(VIFdmanum);
|
const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000;
|
||||||
|
|
||||||
if (VIFdmanum == 0) {
|
if (VIFdmanum == 0) {
|
||||||
VU = &VU0;
|
VU = &VU0;
|
||||||
vifRegs = vif0Regs;
|
vifRegs = vif0Regs;
|
||||||
vifMaskRegs = g_vif0Masks;
|
|
||||||
vif = &vif0;
|
vif = &vif0;
|
||||||
vifRow = g_vifmask.Row0;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
VU = &VU1;
|
VU = &VU1;
|
||||||
vifRegs = vif1Regs;
|
vifRegs = vif1Regs;
|
||||||
vifMaskRegs = g_vif1Masks;
|
|
||||||
vif = &vif1;
|
vif = &vif1;
|
||||||
vifRow = g_vifmask.Row1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 *dest = (u32*)(VU->Mem + v->addr);
|
u32 *dest = (u32*)(VU->Mem + v->addr);
|
||||||
|
|
|
@ -21,8 +21,6 @@
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "VifDma_internal.h"
|
#include "VifDma_internal.h"
|
||||||
#include "newVif.h"
|
#include "newVif.h"
|
||||||
|
|
||||||
#if newVif
|
|
||||||
#include "newVif_OldUnpack.inl"
|
#include "newVif_OldUnpack.inl"
|
||||||
|
|
||||||
__aligned16 nVifStruct nVif[2];
|
__aligned16 nVifStruct nVif[2];
|
||||||
|
@ -271,4 +269,4 @@ _f void _nVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
|
||||||
const bool doMode = !!vifRegs->mode;
|
const bool doMode = !!vifRegs->mode;
|
||||||
UnpackLoopTable[idx][doMode][isFill]( data, size );
|
UnpackLoopTable[idx][doMode][isFill]( data, size );
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
Loading…
Reference in New Issue