Vif Unpacks: Fixed Issue 1325 with Non-SSE4 processors.

Put in some handling for MPG Overflows (VIF command, not videos :P)
Fixed another SSE Unpack bug i came across - Effected THPS Project 8

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5392 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
refraction 2012-08-25 19:09:29 +00:00
parent f44e697743
commit ca2d9b0469
4 changed files with 50 additions and 12 deletions

View File

@ -249,12 +249,30 @@ vifOp(vifCode_Mark) {
static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) { static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
VURegs& VUx = idx ? VU1 : VU0; VURegs& VUx = idx ? VU1 : VU0;
vifStruct& vifX = GetVifX;
pxAssert(VUx.Micro > 0); pxAssert(VUx.Micro > 0);
if (idx && THREAD_VU1) { if (idx && THREAD_VU1) {
vu1Thread.WriteMicroMem(addr, (u8*)data, size*4); vu1Thread.WriteMicroMem(addr, (u8*)data, size*4);
return; return;
} }
if((addr + size *4) > (idx ? 0x4000 : 0x1000))
{
//DevCon.Warning("Handling split MPG");
if (!idx) CpuVU0->Clear(addr, (idx ? 0x4000 : 0x1000) - addr);
else CpuVU1->Clear(addr, (idx ? 0x4000 : 0x1000) - addr);
memcpy_fast(VUx.Micro + addr, data, (idx ? 0x4000 : 0x1000) - addr);
size -= ((idx ? 0x4000 : 0x1000) - addr) / 4;
memcpy_fast(VUx.Micro, data, size);
vifX.tag.addr = size * 4;
}
else
{
//The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it. //The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it.
//Faster without. //Faster without.
//if (memcmp_mmx(VUx.Micro + addr, data, size*4)) { //if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
@ -262,7 +280,9 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
if (!idx) CpuVU0->Clear(addr, size*4); if (!idx) CpuVU0->Clear(addr, size*4);
else CpuVU1->Clear(addr, size*4); else CpuVU1->Clear(addr, size*4);
memcpy_fast(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast memcpy_fast(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast
//}
vifX.tag.addr += size * 4;
}
} }
vifOp(vifCode_MPG) { vifOp(vifCode_MPG) {
@ -282,19 +302,18 @@ vifOp(vifCode_MPG) {
pass2 { pass2 {
if (vifX.vifpacketsize < vifX.tag.size) { // Partial Transfer if (vifX.vifpacketsize < vifX.tag.size) { // Partial Transfer
if((vifX.tag.addr + vifX.vifpacketsize*4) > (idx ? 0x4000 : 0x1000)) { if((vifX.tag.addr + vifX.vifpacketsize*4) > (idx ? 0x4000 : 0x1000)) {
DevCon.Warning("Vif%d MPG Split Overflow", idx); //DevCon.Warning("Vif%d MPG Split Overflow", idx);
} }
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.vifpacketsize); _vifCode_MPG(idx, vifX.tag.addr, data, vifX.vifpacketsize);
vifX.tag.addr += vifX.vifpacketsize * 4; vifX.tag.size -= vifX.vifpacketsize; //We can do this first as its passed as a pointer
vifX.tag.size -= vifX.vifpacketsize;
return vifX.vifpacketsize; return vifX.vifpacketsize;
} }
else { // Full Transfer else { // Full Transfer
if((vifX.tag.addr + vifX.tag.size*4) > (idx ? 0x4000 : 0x1000)) { if((vifX.tag.addr + vifX.tag.size*4) > (idx ? 0x4000 : 0x1000)) {
DevCon.Warning("Vif%d MPG Split Overflow", idx); //DevCon.Warning("Vif%d MPG Split Overflow full %x", idx, vifX.tag.addr + vifX.tag.size*4);
} }
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.tag.size); _vifCode_MPG(idx, vifX.tag.addr, data, vifX.tag.size);
int ret = vifX.tag.size; int ret = vifX.tag.size;
vifX.tag.size = 0; vifX.tag.size = 0;
vifX.cmd = 0; vifX.cmd = 0;
vifX.pass = 0; vifX.pass = 0;

View File

@ -142,6 +142,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
} }
} }
static const __aligned16 u32 SSEXYZWMask[4][4] =
{
{0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
{0xffffffff, 0xffffffff, 0x00000000, 0xffffffff},
{0xffffffff, 0x00000000, 0xffffffff, 0xffffffff},
{0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
};
// Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases) // Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases)
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
{ {
@ -206,6 +214,15 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
break; break;
} }
} }
}
else if( dest == src )
{
//VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case.
if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]);
if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]);
if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]);
if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]);
} }
} }

View File

@ -160,11 +160,11 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp )
{ {
case 0: case 0:
case 1: case 1:
case 2: UnpkNoOfIterations = 4; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break; case 2: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break;
case 4: case 4:
case 5: case 5:
case 6: UnpkNoOfIterations = 2; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break; case 6: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break;
case 8: break; case 8: break;
case 9: break; case 9: break;
@ -195,7 +195,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum); MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
pxAssume(vCL == 0); pxAssume(vCL == 0);
UnpkLoopIteration = 0;
// Value passed determines # of col regs we need to load // Value passed determines # of col regs we need to load
SetMasks(isFill ? blockSize : cycleSize); SetMasks(isFill ? blockSize : cycleSize);

View File

@ -46,6 +46,7 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
, workReg( xmm1 ) , workReg( xmm1 )
, destReg( xmm0 ) , destReg( xmm0 )
{ {
UnpkLoopIteration = 0;
} }
void VifUnpackSSE_Base::xMovDest() const { void VifUnpackSSE_Base::xMovDest() const {
@ -182,9 +183,10 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
} }
else else
{ {
xMOV32 (workReg, ptr32[srcIndirect]); xXOR.PD (destReg, destReg);
xPUNPCK.LWD(workReg, workReg); xMOV64 (workReg, ptr32[srcIndirect]);
xShiftR (workReg, 16); xPUNPCK.LWD(workReg, destReg);
//xShiftR (workReg, 16);
} }
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
} }