mirror of https://github.com/PCSX2/pcsx2.git
VIF: Some optimizations for the VIF Rec, some small clean-up/optimizations for VIF itself.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5368 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
68a833f4e7
commit
f19c0b7ca9
|
@ -257,12 +257,14 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) {
|
|||
vu1Thread.WriteMicroMem(addr, (u8*)data, size*4);
|
||||
return;
|
||||
}
|
||||
if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
|
||||
//The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it.
|
||||
//Faster without.
|
||||
//if (memcmp_mmx(VUx.Micro + addr, data, size*4)) {
|
||||
// Clear VU memory before writing!
|
||||
if (!idx) CpuVU0->Clear(addr, size*4);
|
||||
else CpuVU1->Clear(addr, size*4);
|
||||
memcpy_fast(VUx.Micro + addr, data, size*4);
|
||||
}
|
||||
memcpy_aligned(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast
|
||||
//}
|
||||
}
|
||||
|
||||
vifOp(vifCode_MPG) {
|
||||
|
@ -381,14 +383,6 @@ vifOp(vifCode_Nop) {
|
|||
pass1 {
|
||||
GetVifX.cmd = 0;
|
||||
GetVifX.pass = 0;
|
||||
/*if(idx && vif1ch.chcr.STR == true)
|
||||
{
|
||||
//Some games use a huge stream of NOPS to wait for a GIF packet to start, alas the way PCSX2 works it never starts
|
||||
//So the mask can go on before the packet continues, causing desync.
|
||||
|
||||
if(((data[1] >> 24) & 0x7f) == 0x6) //Look in to the future and see if we have a mask path 3 command (NFSU)
|
||||
GetVifX.vifstalled = true; //Stall if we do to get the timing right.
|
||||
}*/
|
||||
}
|
||||
pass3 { VifCodeLog("Nop"); }
|
||||
return 1;
|
||||
|
|
|
@ -22,34 +22,12 @@
|
|||
// VifCode Transfer Interpreter (Vif0/Vif1)
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Doesn't stall if the next vifCode is the Mark command
|
||||
_vifT bool runMark(u32* &data) {
|
||||
if (((vifXRegs.code >> 24) & 0x7f) == 0x7) {
|
||||
//DevCon.WriteLn("Vif%d: Running Mark with I-bit", idx);
|
||||
return 1; // No Stall?
|
||||
}
|
||||
return 1; // Stall
|
||||
}
|
||||
|
||||
// Returns 1 if i-bit && finished vifcode && i-bit not masked
|
||||
_vifT bool analyzeIbit(u32* &data, int iBit) {
|
||||
vifStruct& vifX = GetVifX;
|
||||
if (iBit && !vifX.cmd && !vifXRegs.err.MII) {
|
||||
//DevCon.WriteLn("Vif I-Bit IRQ");
|
||||
vifX.irq++;
|
||||
|
||||
if(CHECK_VIF1STALLHACK) return 0;
|
||||
else return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Interprets packet
|
||||
_vifT void vifTransferLoop(u32* &data) {
|
||||
vifStruct& vifX = GetVifX;
|
||||
|
||||
u32& pSize = vifX.vifpacketsize;
|
||||
int iBit = vifX.cmd >> 7;
|
||||
|
||||
int ret = 0;
|
||||
|
||||
vifXRegs.stat.VPS |= VPS_TRANSFERRING;
|
||||
|
@ -58,11 +36,19 @@ _vifT void vifTransferLoop(u32* &data) {
|
|||
while (pSize > 0 && !vifX.vifstalled) {
|
||||
|
||||
if(!vifX.cmd) { // Get new VifCode
|
||||
|
||||
|
||||
if(!vifXRegs.err.MII)
|
||||
{
|
||||
if(vifX.irq && !CHECK_VIF1STALLHACK)
|
||||
break;
|
||||
|
||||
vifX.irq = data[0] >> 31;
|
||||
}
|
||||
|
||||
vifXRegs.code = data[0];
|
||||
vifX.cmd = data[0] >> 24;
|
||||
iBit = data[0] >> 31;
|
||||
|
||||
|
||||
|
||||
//VIF_LOG("New VifCMD %x tagsize %x", vifX.cmd, vifX.tag.size);
|
||||
if (IsDevBuild && SysTrace.EE.VIFcode.IsActive()) {
|
||||
// Pass 2 means "log it"
|
||||
|
@ -73,10 +59,7 @@ _vifT void vifTransferLoop(u32* &data) {
|
|||
ret = vifCmdHandler[idx][vifX.cmd & 0x7f](vifX.pass, data);
|
||||
data += ret;
|
||||
pSize -= ret;
|
||||
if (analyzeIbit<idx>(data, iBit)) break;
|
||||
}
|
||||
|
||||
if (pSize) vifX.vifstalled = true;
|
||||
}
|
||||
|
||||
_vifT static __fi bool vifTransfer(u32 *data, int size, bool TTE) {
|
||||
|
|
|
@ -75,12 +75,14 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
|
|||
const int idx = v.idx;
|
||||
const vifStruct& vif = MTVU_VifX;
|
||||
|
||||
u32 m0 = vB.mask;
|
||||
u32 m1 = m0 & 0xaaaaaaaa;
|
||||
u32 m2 =(~m1>>1) & m0;
|
||||
u32 m3 = (m1>>1) & ~m0;
|
||||
if((m2&&doMask)||doMode) { xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); }
|
||||
//This could have ended up copying the row when there was no row to write.1810080
|
||||
u32 m0 = vB.mask; //The actual mask example 0x03020100
|
||||
u32 m3 = ((m0 & 0xaaaaaaaa)>>1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
||||
u32 m2 = (m0 & 0x55555555) & (~m0>>1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row
|
||||
|
||||
if((m2&&doMask)||doMode) { xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); MSKPATH3_LOG("Moving row");}
|
||||
if (m3&&doMask) {
|
||||
MSKPATH3_LOG("Merging Cols");
|
||||
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
||||
if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
||||
if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
||||
|
@ -92,33 +94,37 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
|
|||
|
||||
void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
|
||||
pxAssertDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
|
||||
xRegisterSSE t = regX == xmm0 ? xmm1 : xmm0; // Get Temp Reg
|
||||
|
||||
int cc = aMin(vCL, 3);
|
||||
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
|
||||
u32 m1 = m0 & 0xaa;
|
||||
u32 m2 =(~m1>>1) & m0;
|
||||
u32 m3 = (m1>>1) & ~m0;
|
||||
u32 m4 = (m1>>1) & m0;
|
||||
u32 m0 = (vB.mask >> (cc * 8)) & 0xff; //The actual mask example 0xE4 (protect, col, row, clear)
|
||||
u32 m3 = ((m0 & 0xaa)>>1) & ~m0; //all the upper bits (cols shifted right) cancelling out any write protects 0x10
|
||||
u32 m2 = (m0 & 0x55) & (~m0>>1); // all the lower bits (rows)cancelling out any write protects 0x04
|
||||
u32 m4 = (m0 & ~((m3<<1) | m2)) & 0x55; // = 0xC0 & 0x55 = 0x40 (for merge mask)
|
||||
|
||||
makeMergeMask(m2);
|
||||
makeMergeMask(m3);
|
||||
makeMergeMask(m4);
|
||||
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
|
||||
if (doMask&&m2) { mergeVectors(regX, xmmRow, t, m2); } // Merge MaskRow
|
||||
if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), t, m3); } // Merge MaskCol
|
||||
if (doMask&&m4) { mergeVectors(regX, xmmTemp, t, m4); } // Merge Write Protect
|
||||
|
||||
if (doMask&&m2) { mergeVectors(regX, xmmRow, xmmTemp, m2); } // Merge MaskRow
|
||||
if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), xmmTemp, m3); } // Merge MaskCol
|
||||
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]);
|
||||
mergeVectors(regX, xmmTemp, xmmTemp, m4); } // Merge Write Protect
|
||||
if (doMode) {
|
||||
u32 m5 = (~m1>>1) & ~m0;
|
||||
u32 m5 = ~(m2|m3|m4) & 0xf;
|
||||
|
||||
if (!doMask) m5 = 0xf;
|
||||
else makeMergeMask(m5);
|
||||
if (m5 < 0xf) {
|
||||
|
||||
if (m5 < 0xf)
|
||||
{
|
||||
xPXOR(xmmTemp, xmmTemp);
|
||||
mergeVectors(xmmTemp, xmmRow, t, m5);
|
||||
mergeVectors(xmmTemp, xmmRow, xmmTemp, m5);
|
||||
xPADD.D(regX, xmmTemp);
|
||||
if (doMode==2) mergeVectors(xmmRow, regX, t, m5);
|
||||
if (doMode==2) mergeVectors(xmmRow, regX, xmmTemp, m5);
|
||||
}
|
||||
else if (m5 == 0xf) {
|
||||
else
|
||||
{
|
||||
xPADD.D(regX, xmmRow);
|
||||
if (doMode==2) xMOVAPS(xmmRow, regX);
|
||||
if (doMode==2){ xMOVAPS(xmmRow, regX); }
|
||||
}
|
||||
}
|
||||
xMOVAPS(ptr32[dstIndirect], regX);
|
||||
|
@ -127,6 +133,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
|
|||
void VifUnpackSSE_Dynarec::writeBackRow() const {
|
||||
const int idx = v.idx;
|
||||
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
||||
|
||||
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
|
||||
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
||||
}
|
||||
|
@ -143,9 +150,39 @@ static void ShiftDisplacementWindow( xAddressVoid& addr, const xRegister32& modR
|
|||
addImm += 0xf0;
|
||||
addr -= 0xf0;
|
||||
}
|
||||
if(addImm) xADD(modReg, addImm);
|
||||
if(addImm) { xADD(modReg, addImm); }
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp )
|
||||
{
|
||||
|
||||
switch( upknum )
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2: UnpkNoOfIterations = 4; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break;
|
||||
|
||||
case 4:
|
||||
case 5:
|
||||
case 6: UnpkNoOfIterations = 2; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break;
|
||||
|
||||
case 8: break;
|
||||
case 9: break;
|
||||
case 10: break;
|
||||
|
||||
case 12: break;
|
||||
case 13: break;
|
||||
case 14: break;
|
||||
case 15: break;
|
||||
|
||||
case 3:
|
||||
case 7:
|
||||
case 11:
|
||||
pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
void VifUnpackSSE_Dynarec::CompileRoutine() {
|
||||
const int upkNum = vB.upkType & 0xf;
|
||||
const u8& vift = nVifT[upkNum];
|
||||
|
@ -155,29 +192,32 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
|||
|
||||
uint vNum = vB.num ? vB.num : 256;
|
||||
doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature.
|
||||
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||
|
||||
pxAssume(vCL == 0);
|
||||
|
||||
UnpkLoopIteration = 0;
|
||||
// Value passed determines # of col regs we need to load
|
||||
SetMasks(isFill ? blockSize : cycleSize);
|
||||
|
||||
while (vNum) {
|
||||
|
||||
ShiftDisplacementWindow( srcIndirect, edx );
|
||||
|
||||
ShiftDisplacementWindow( dstIndirect, ecx );
|
||||
|
||||
if(UnpkNoOfIterations == 0)
|
||||
ShiftDisplacementWindow( srcIndirect, edx ); //Don't need to do this otherwise as we arent reading the source.
|
||||
|
||||
|
||||
if (vCL < cycleSize) {
|
||||
ModUnpack(upkNum, false);
|
||||
xUnpack(upkNum);
|
||||
xMovDest();
|
||||
ModUnpack(upkNum, true);
|
||||
|
||||
|
||||
dstIndirect += 16;
|
||||
srcIndirect += vift;
|
||||
|
||||
if( IsUnmaskedOp() ) {
|
||||
++destReg;
|
||||
++workReg;
|
||||
}
|
||||
|
||||
vNum--;
|
||||
if (++vCL == blockSize) vCL = 0;
|
||||
}
|
||||
|
@ -189,11 +229,6 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
|||
|
||||
dstIndirect += 16;
|
||||
|
||||
if( IsUnmaskedOp() ) {
|
||||
++destReg;
|
||||
++workReg;
|
||||
}
|
||||
|
||||
vNum--;
|
||||
if (++vCL == blockSize) vCL = 0;
|
||||
}
|
||||
|
@ -256,7 +291,7 @@ _vifT static __ri bool dVifExecuteUnpack(const u8* data, bool isFill)
|
|||
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
||||
}
|
||||
else {
|
||||
//DevCon.WriteLn("Running Interpreter Block");
|
||||
DevCon.WriteLn("Running Interpreter Block");
|
||||
_nVifUnpack(idx, data, vifRegs.mode, isFill);
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -117,7 +117,7 @@ _vifT int nVifUnpack(const u8* data) {
|
|||
|
||||
if (ret == vif.tag.size) { // Full Transfer
|
||||
if (v.bSize) { // Last transfer was partial
|
||||
memcpy_fast(&v.buffer[v.bSize], data, size);
|
||||
memcpy_aligned(&v.buffer[v.bSize], data, size);
|
||||
v.bSize += size;
|
||||
size = v.bSize;
|
||||
data = v.buffer;
|
||||
|
@ -140,7 +140,7 @@ _vifT int nVifUnpack(const u8* data) {
|
|||
v.bSize = 0;
|
||||
}
|
||||
else { // Partial Transfer
|
||||
memcpy_fast(&v.buffer[v.bSize], data, size);
|
||||
memcpy_aligned(&v.buffer[v.bSize], data, size);
|
||||
v.bSize += size;
|
||||
vif.tag.size -= ret;
|
||||
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
//static __pagealigned u8 nVifUpkExec[__pagesize*4];
|
||||
static RecompiledCodeReserve* nVifUpkExec = NULL;
|
||||
|
||||
|
||||
// Merges xmm vectors without modifying source reg
|
||||
void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
|
||||
|
@ -33,7 +32,7 @@ void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xy
|
|||
mVUmergeRegs(dest, src, xyzw);
|
||||
}
|
||||
else {
|
||||
xMOVAPS(temp, src);
|
||||
if(temp != src) xMOVAPS(temp, src); //Sometimes we don't care if the source is modified and is temp reg.
|
||||
mVUmergeRegs(dest, temp, xyzw);
|
||||
}
|
||||
}
|
||||
|
@ -61,7 +60,7 @@ void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
|
|||
|
||||
void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
|
||||
if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
|
||||
else xPMOVSX.BD(regX, ptr32[srcIndirect]);
|
||||
else xPMOVSX.BD(regX, ptr32[srcIndirect]);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
|
||||
|
@ -70,37 +69,87 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_32() const {
|
||||
xMOV32 (workReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
|
||||
switch(UnpkLoopIteration)
|
||||
{
|
||||
case 0:
|
||||
xMOV128 (workReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
break;
|
||||
case 1:
|
||||
xPSHUF.D (destReg, workReg, _v1);
|
||||
break;
|
||||
case 2:
|
||||
xPSHUF.D (destReg, workReg, _v2);
|
||||
break;
|
||||
case 3:
|
||||
xPSHUF.D (destReg, workReg, _v3);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
|
||||
if (!x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (workReg);
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
|
||||
switch(UnpkLoopIteration)
|
||||
{
|
||||
case 0:
|
||||
xPMOVXX16 (workReg);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
break;
|
||||
case 1:
|
||||
xPSHUF.D (destReg, workReg, _v1);
|
||||
break;
|
||||
case 2:
|
||||
xPSHUF.D (destReg, workReg, _v2);
|
||||
break;
|
||||
case 3:
|
||||
xPSHUF.D (destReg, workReg, _v3);
|
||||
break;
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (workReg);
|
||||
}
|
||||
else
|
||||
|
||||
if (!x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xMOV8 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(workReg, workReg);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 24);
|
||||
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
return;
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
|
||||
switch(UnpkLoopIteration)
|
||||
{
|
||||
case 0:
|
||||
xPMOVXX8 (workReg);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
break;
|
||||
case 1:
|
||||
xPSHUF.D (destReg, workReg, _v1);
|
||||
break;
|
||||
case 2:
|
||||
xPSHUF.D (destReg, workReg, _v2);
|
||||
break;
|
||||
case 3:
|
||||
xPSHUF.D (destReg, workReg, _v3);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// The V2 + V3 unpacks have freaky behaviour, the manual claims "indeterminate".
|
||||
|
@ -109,44 +158,75 @@ void VifUnpackSSE_Base::xUPK_S_8() const {
|
|||
// I have commented after each shuffle to show what data is going where - Ref
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_32() const {
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
|
||||
|
||||
if(UnpkLoopIteration == 0)
|
||||
{
|
||||
xMOV128 (workReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||
}
|
||||
else
|
||||
{
|
||||
xPSHUF.D (destReg, workReg, 0xEE); //v3v2v3v2
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
|
||||
if(UnpkLoopIteration == 0 || !x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (workReg);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV32 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
xPSHUF.D (destReg, workReg, 0xEE); //v3v2v3v2
|
||||
}
|
||||
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
|
||||
|
||||
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
|
||||
if(UnpkLoopIteration == 0 || !x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (workReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(workReg, workReg);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 24);
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
xMOV16 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
xPSHUF.D (destReg, workReg, 0xEE); //v3v2v3v2
|
||||
}
|
||||
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
|
||||
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_32() const {
|
||||
|
||||
xMOV128 (destReg, ptr128[srcIndirect]);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_16() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
|
@ -160,6 +240,7 @@ void VifUnpackSSE_Base::xUPK_V3_16() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_8() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
|
@ -174,10 +255,12 @@ void VifUnpackSSE_Base::xUPK_V3_8() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_32() const {
|
||||
|
||||
xMOV128 (destReg, ptr32[srcIndirect]);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_16() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
|
@ -191,6 +274,7 @@ void VifUnpackSSE_Base::xUPK_V4_16() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_8() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
|
@ -205,6 +289,7 @@ void VifUnpackSSE_Base::xUPK_V4_8() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_5() const {
|
||||
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (workReg, workReg, _v0);
|
||||
xPSLL.D (workReg, 3); // ABG|R5.000
|
||||
|
|
|
@ -34,12 +34,15 @@ class VifUnpackSSE_Base
|
|||
public:
|
||||
bool usn; // unsigned flag
|
||||
bool doMask; // masking write enable flag
|
||||
int UnpkLoopIteration;
|
||||
int UnpkNoOfIterations;
|
||||
|
||||
|
||||
protected:
|
||||
xAddressVoid dstIndirect;
|
||||
xAddressVoid srcIndirect;
|
||||
xRegisterSSE workReg;
|
||||
xRegisterSSE destReg;
|
||||
xRegisterSSE destReg;
|
||||
|
||||
public:
|
||||
VifUnpackSSE_Base();
|
||||
|
@ -105,7 +108,7 @@ class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
|
|||
public:
|
||||
bool isFill;
|
||||
int doMode; // two bit value representing... something!
|
||||
|
||||
|
||||
protected:
|
||||
const nVifStruct& v; // vif0 or vif1
|
||||
const nVifBlock& vB; // some pre-collected data from VifStruct
|
||||
|
@ -126,7 +129,9 @@ public:
|
|||
|
||||
virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; }
|
||||
|
||||
void ModUnpack( int upknum, bool PostOp );
|
||||
void CompileRoutine();
|
||||
|
||||
|
||||
protected:
|
||||
virtual void doMaskWrite(const xRegisterSSE& regX) const;
|
||||
|
|
Loading…
Reference in New Issue