mirror of https://github.com/PCSX2/pcsx2.git
Vif Unpacks: Fixed Issue 1325 with Non-SSE4 processors.
Put in some handling for MPG Overflows (VIF command, not videos :P) Fixed another SSE Unpack bug i came across - Effected THPS Project 8 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5392 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
f44e697743
commit
ca2d9b0469
|
@ -249,12 +249,30 @@ vifOp(vifCode_Mark) {
|
|||
|
||||
static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
|
||||
VURegs& VUx = idx ? VU1 : VU0;
|
||||
vifStruct& vifX = GetVifX;
|
||||
pxAssert(VUx.Micro > 0);
|
||||
|
||||
if (idx && THREAD_VU1) {
|
||||
vu1Thread.WriteMicroMem(addr, (u8*)data, size*4);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if((addr + size *4) > (idx ? 0x4000 : 0x1000))
|
||||
{
|
||||
//DevCon.Warning("Handling split MPG");
|
||||
if (!idx) CpuVU0->Clear(addr, (idx ? 0x4000 : 0x1000) - addr);
|
||||
else CpuVU1->Clear(addr, (idx ? 0x4000 : 0x1000) - addr);
|
||||
|
||||
memcpy_fast(VUx.Micro + addr, data, (idx ? 0x4000 : 0x1000) - addr);
|
||||
size -= ((idx ? 0x4000 : 0x1000) - addr) / 4;
|
||||
memcpy_fast(VUx.Micro, data, size);
|
||||
|
||||
vifX.tag.addr = size * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
//The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it.
|
||||
//Faster without.
|
||||
//if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
|
||||
|
@ -262,7 +280,9 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
|
|||
if (!idx) CpuVU0->Clear(addr, size*4);
|
||||
else CpuVU1->Clear(addr, size*4);
|
||||
memcpy_fast(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast
|
||||
//}
|
||||
|
||||
vifX.tag.addr += size * 4;
|
||||
}
|
||||
}
|
||||
|
||||
vifOp(vifCode_MPG) {
|
||||
|
@ -282,19 +302,18 @@ vifOp(vifCode_MPG) {
|
|||
pass2 {
|
||||
if (vifX.vifpacketsize < vifX.tag.size) { // Partial Transfer
|
||||
if((vifX.tag.addr + vifX.vifpacketsize*4) > (idx ? 0x4000 : 0x1000)) {
|
||||
DevCon.Warning("Vif%d MPG Split Overflow", idx);
|
||||
//DevCon.Warning("Vif%d MPG Split Overflow", idx);
|
||||
}
|
||||
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.vifpacketsize);
|
||||
vifX.tag.addr += vifX.vifpacketsize * 4;
|
||||
vifX.tag.size -= vifX.vifpacketsize;
|
||||
vifX.tag.size -= vifX.vifpacketsize; //We can do this first as its passed as a pointer
|
||||
return vifX.vifpacketsize;
|
||||
}
|
||||
else { // Full Transfer
|
||||
if((vifX.tag.addr + vifX.tag.size*4) > (idx ? 0x4000 : 0x1000)) {
|
||||
DevCon.Warning("Vif%d MPG Split Overflow", idx);
|
||||
//DevCon.Warning("Vif%d MPG Split Overflow full %x", idx, vifX.tag.addr + vifX.tag.size*4);
|
||||
}
|
||||
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.tag.size);
|
||||
int ret = vifX.tag.size;
|
||||
int ret = vifX.tag.size;
|
||||
vifX.tag.size = 0;
|
||||
vifX.cmd = 0;
|
||||
vifX.pass = 0;
|
||||
|
|
|
@ -142,6 +142,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
|||
}
|
||||
}
|
||||
|
||||
static const __aligned16 u32 SSEXYZWMask[4][4] =
|
||||
{
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
|
||||
{0xffffffff, 0xffffffff, 0x00000000, 0xffffffff},
|
||||
{0xffffffff, 0x00000000, 0xffffffff, 0xffffffff},
|
||||
{0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
|
||||
};
|
||||
|
||||
// Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases)
|
||||
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
||||
{
|
||||
|
@ -207,6 +215,15 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
|||
}
|
||||
}
|
||||
}
|
||||
else if( dest == src )
|
||||
{
|
||||
//VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case.
|
||||
if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]);
|
||||
if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]);
|
||||
if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]);
|
||||
if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
|
|
@ -160,11 +160,11 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp )
|
|||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2: UnpkNoOfIterations = 4; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break;
|
||||
case 2: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break;
|
||||
|
||||
case 4:
|
||||
case 5:
|
||||
case 6: UnpkNoOfIterations = 2; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break;
|
||||
case 6: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break;
|
||||
|
||||
case 8: break;
|
||||
case 9: break;
|
||||
|
@ -195,7 +195,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
|||
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||
|
||||
pxAssume(vCL == 0);
|
||||
UnpkLoopIteration = 0;
|
||||
|
||||
// Value passed determines # of col regs we need to load
|
||||
SetMasks(isFill ? blockSize : cycleSize);
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
|
|||
, workReg( xmm1 )
|
||||
, destReg( xmm0 )
|
||||
{
|
||||
UnpkLoopIteration = 0;
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xMovDest() const {
|
||||
|
@ -182,9 +183,10 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
|
|||
}
|
||||
else
|
||||
{
|
||||
xMOV32 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
xXOR.PD (destReg, destReg);
|
||||
xMOV64 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, destReg);
|
||||
//xShiftR (workReg, 16);
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue