mirror of https://github.com/PCSX2/pcsx2.git
Vif Unpacks: Fixed Issue 1325 with Non-SSE4 processors.
Put in some handling for MPG Overflows (VIF command, not videos :P) Fixed another SSE Unpack bug i came across - Effected THPS Project 8 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5392 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
f44e697743
commit
ca2d9b0469
|
@ -249,12 +249,30 @@ vifOp(vifCode_Mark) {
|
||||||
|
|
||||||
static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
|
static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
|
||||||
VURegs& VUx = idx ? VU1 : VU0;
|
VURegs& VUx = idx ? VU1 : VU0;
|
||||||
|
vifStruct& vifX = GetVifX;
|
||||||
pxAssert(VUx.Micro > 0);
|
pxAssert(VUx.Micro > 0);
|
||||||
|
|
||||||
if (idx && THREAD_VU1) {
|
if (idx && THREAD_VU1) {
|
||||||
vu1Thread.WriteMicroMem(addr, (u8*)data, size*4);
|
vu1Thread.WriteMicroMem(addr, (u8*)data, size*4);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if((addr + size *4) > (idx ? 0x4000 : 0x1000))
|
||||||
|
{
|
||||||
|
//DevCon.Warning("Handling split MPG");
|
||||||
|
if (!idx) CpuVU0->Clear(addr, (idx ? 0x4000 : 0x1000) - addr);
|
||||||
|
else CpuVU1->Clear(addr, (idx ? 0x4000 : 0x1000) - addr);
|
||||||
|
|
||||||
|
memcpy_fast(VUx.Micro + addr, data, (idx ? 0x4000 : 0x1000) - addr);
|
||||||
|
size -= ((idx ? 0x4000 : 0x1000) - addr) / 4;
|
||||||
|
memcpy_fast(VUx.Micro, data, size);
|
||||||
|
|
||||||
|
vifX.tag.addr = size * 4;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
//The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it.
|
//The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it.
|
||||||
//Faster without.
|
//Faster without.
|
||||||
//if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
|
//if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
|
||||||
|
@ -262,7 +280,9 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
|
||||||
if (!idx) CpuVU0->Clear(addr, size*4);
|
if (!idx) CpuVU0->Clear(addr, size*4);
|
||||||
else CpuVU1->Clear(addr, size*4);
|
else CpuVU1->Clear(addr, size*4);
|
||||||
memcpy_fast(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast
|
memcpy_fast(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast
|
||||||
//}
|
|
||||||
|
vifX.tag.addr += size * 4;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vifOp(vifCode_MPG) {
|
vifOp(vifCode_MPG) {
|
||||||
|
@ -282,19 +302,18 @@ vifOp(vifCode_MPG) {
|
||||||
pass2 {
|
pass2 {
|
||||||
if (vifX.vifpacketsize < vifX.tag.size) { // Partial Transfer
|
if (vifX.vifpacketsize < vifX.tag.size) { // Partial Transfer
|
||||||
if((vifX.tag.addr + vifX.vifpacketsize*4) > (idx ? 0x4000 : 0x1000)) {
|
if((vifX.tag.addr + vifX.vifpacketsize*4) > (idx ? 0x4000 : 0x1000)) {
|
||||||
DevCon.Warning("Vif%d MPG Split Overflow", idx);
|
//DevCon.Warning("Vif%d MPG Split Overflow", idx);
|
||||||
}
|
}
|
||||||
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.vifpacketsize);
|
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.vifpacketsize);
|
||||||
vifX.tag.addr += vifX.vifpacketsize * 4;
|
vifX.tag.size -= vifX.vifpacketsize; //We can do this first as its passed as a pointer
|
||||||
vifX.tag.size -= vifX.vifpacketsize;
|
|
||||||
return vifX.vifpacketsize;
|
return vifX.vifpacketsize;
|
||||||
}
|
}
|
||||||
else { // Full Transfer
|
else { // Full Transfer
|
||||||
if((vifX.tag.addr + vifX.tag.size*4) > (idx ? 0x4000 : 0x1000)) {
|
if((vifX.tag.addr + vifX.tag.size*4) > (idx ? 0x4000 : 0x1000)) {
|
||||||
DevCon.Warning("Vif%d MPG Split Overflow", idx);
|
//DevCon.Warning("Vif%d MPG Split Overflow full %x", idx, vifX.tag.addr + vifX.tag.size*4);
|
||||||
}
|
}
|
||||||
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.tag.size);
|
_vifCode_MPG(idx, vifX.tag.addr, data, vifX.tag.size);
|
||||||
int ret = vifX.tag.size;
|
int ret = vifX.tag.size;
|
||||||
vifX.tag.size = 0;
|
vifX.tag.size = 0;
|
||||||
vifX.cmd = 0;
|
vifX.cmd = 0;
|
||||||
vifX.pass = 0;
|
vifX.pass = 0;
|
||||||
|
|
|
@ -142,6 +142,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const __aligned16 u32 SSEXYZWMask[4][4] =
|
||||||
|
{
|
||||||
|
{0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
|
||||||
|
{0xffffffff, 0xffffffff, 0x00000000, 0xffffffff},
|
||||||
|
{0xffffffff, 0x00000000, 0xffffffff, 0xffffffff},
|
||||||
|
{0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
|
||||||
|
};
|
||||||
|
|
||||||
// Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases)
|
// Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases)
|
||||||
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
||||||
{
|
{
|
||||||
|
@ -206,6 +214,15 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else if( dest == src )
|
||||||
|
{
|
||||||
|
//VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case.
|
||||||
|
if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]);
|
||||||
|
if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]);
|
||||||
|
if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]);
|
||||||
|
if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -160,11 +160,11 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp )
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
case 2: UnpkNoOfIterations = 4; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break;
|
case 2: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break;
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
case 5:
|
case 5:
|
||||||
case 6: UnpkNoOfIterations = 2; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break;
|
case 6: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break;
|
||||||
|
|
||||||
case 8: break;
|
case 8: break;
|
||||||
case 9: break;
|
case 9: break;
|
||||||
|
@ -195,7 +195,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
||||||
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||||
|
|
||||||
pxAssume(vCL == 0);
|
pxAssume(vCL == 0);
|
||||||
UnpkLoopIteration = 0;
|
|
||||||
// Value passed determines # of col regs we need to load
|
// Value passed determines # of col regs we need to load
|
||||||
SetMasks(isFill ? blockSize : cycleSize);
|
SetMasks(isFill ? blockSize : cycleSize);
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,7 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
|
||||||
, workReg( xmm1 )
|
, workReg( xmm1 )
|
||||||
, destReg( xmm0 )
|
, destReg( xmm0 )
|
||||||
{
|
{
|
||||||
|
UnpkLoopIteration = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xMovDest() const {
|
void VifUnpackSSE_Base::xMovDest() const {
|
||||||
|
@ -182,9 +183,10 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xMOV32 (workReg, ptr32[srcIndirect]);
|
xXOR.PD (destReg, destReg);
|
||||||
xPUNPCK.LWD(workReg, workReg);
|
xMOV64 (workReg, ptr32[srcIndirect]);
|
||||||
xShiftR (workReg, 16);
|
xPUNPCK.LWD(workReg, destReg);
|
||||||
|
//xShiftR (workReg, 16);
|
||||||
}
|
}
|
||||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue