-implemented the ability to run VU's for x amount of cycles, instead of running till the microprogram is completed (some games can get stuck in infinite loops, so this is needed)
-fixed some errors...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1023 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-19 21:22:47 +00:00
parent a2d305b9ab
commit b7ea57a5d9
9 changed files with 91 additions and 59 deletions

View File

@ -84,7 +84,7 @@ namespace VU0micro
FreezeXMMRegs(1);
FreezeMMXRegs(1);
runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0);
runVUrec(VU0.VI[REG_TPC].UL, 0x20000, 0);
FreezeXMMRegs(0);
FreezeMMXRegs(0);
}

View File

@ -142,8 +142,8 @@ namespace VU1micro
assert( (VU1.VI[REG_TPC].UL&7) == 0 );
FreezeXMMRegs(1);
FreezeMMXRegs(0);
runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1);
FreezeMMXRegs(1);
runVUrec(VU1.VI[REG_TPC].UL, 20000, 1);
FreezeXMMRegs(0);
FreezeMMXRegs(0);
}

View File

@ -165,7 +165,6 @@ __forceinline int mVUsearchProg(microVU* mVU) {
if (mVU->prog.cleared) { // If cleared, we need to search for new program
for (int i = 0; i <= mVU->prog.total; i++) {
//if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/)
//if (mVU->prog.prog[i]) // ToDo: Implement Cycles
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
//if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); }
mVU->prog.cur = i;

View File

@ -17,7 +17,7 @@
*/
#pragma once
#define mVUdebug // Prints Extra Info to Console
//#define mVUdebug // Prints Extra Info to Console
#include "Common.h"
#include "VU.h"
#include "GS.h"
@ -105,7 +105,9 @@ struct microVU {
u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR)
u32 p; // Holds current P instance index
u32 q; // Holds current Q instance index
u32 tempBackup;
u32 espBackup; // Temp Backup for ESP
u32 totalCycles;
u32 cycles;
};
// microVU rec structs

View File

@ -56,7 +56,7 @@ microVUt(void) mVUallocFMAC1b(int& Fd) {
microVU* mVU = mVUx;
if (!_Fd_) return;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fd, xmmT1, _X_Y_Z_W);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W, 1);
}
//------------------------------------------------------------------
@ -74,7 +74,7 @@ microVUt(void) mVUallocFMAC2b(int& Ft) {
microVU* mVU = mVUx;
if (!_Ft_) { SysPrintf("microVU: If a game does this, its retarded...\n"); return; }
//if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Ft, xmmT1, _X_Y_Z_W);
mVUsaveReg<vuIndex>(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
//------------------------------------------------------------------
@ -201,10 +201,10 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) {
// FMAC6 - Normal FMAC Opcodes (I Reg)
//------------------------------------------------------------------
#define getIreg(reg, modXYZW) { \
MOV32ItoR(gprT1, mVU->iReg); \
SSE2_MOVD_R_to_XMM(reg, gprT1); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
#define getIreg(reg, modXYZW) { \
MOV32MtoR(gprT1, (uptr)&mVU->regs->VI[REG_I].UL); \
SSE2_MOVD_R_to_XMM(reg, gprT1); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
}
@ -269,7 +269,7 @@ microVUt(void) mVUallocFMAC8b(int& Fd) {
microVU* mVU = mVUx;
if (!_Fd_) return;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fd, xmmT1, _xyzw_ACC);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W, 0);
}
//------------------------------------------------------------------
@ -302,7 +302,7 @@ microVUt(void) mVUallocFMAC9b(int& Fd) {
microVU* mVU = mVUx;
if (!_Fd_) return;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fd, xmmFt, _xyzw_ACC);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W, 0);
}
//------------------------------------------------------------------

View File

@ -145,7 +145,7 @@ microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) {
mVUlog("mVUsetupBranch");
PUSH32R(gprR); // Backup gprR
MOV32RtoM((uptr)&mVU->tempBackup, gprESP);
MOV32RtoM((uptr)&mVU->espBackup, gprESP);
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
@ -172,7 +172,7 @@ microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) {
OR32RtoR(gprF2, getFlagReg2(bMac[2]));
OR32RtoR(gprF3, getFlagReg2(bMac[3]));
MOV32MtoR(gprESP, (uptr)&mVU->tempBackup);
MOV32MtoR(gprESP, (uptr)&mVU->espBackup);
POP32R(gprR); // Restore gprR
// Shuffle P/Q regs since every block starts at instance #0
@ -236,6 +236,30 @@ microVUt(void) mVUdivSet() {
}
}
microVUt(void) mVUendProgram() {
microVU* mVU = mVUx;
incCycles(55); // Ensures Valid P/Q instances
mVUcycles -= 55;
if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ);
AND32ItoM((uptr)&microVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
}
microVUt(void) mVUtestCycles() {
microVU* mVU = mVUx;
iPC = mVUstartPC;
CMP32ItoM((uptr)&mVU->cycles, 0);
u8* jmp8 = JG8(0);
mVUendProgram<vuIndex>();
x86SetJ8(jmp8);
SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
}
//------------------------------------------------------------------
// Recompiler
//------------------------------------------------------------------
@ -245,17 +269,15 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
u8* thisPtr = x86Ptr;
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUlog("microVU: invalid startPC"); }
//startPC &= (vuIndex ? 0x3ff8 : 0xff8);
//mVUlog("mVUcompile Search");
startPC &= (vuIndex ? 0x3ff8 : 0xff8);
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
if (pBlock) { return pBlock->x86ptrStart; }
//mVUlog("mVUcompile First Pass");
// First Pass
iPC = startPC / 4;
setCode();
mVUbranch = 0;
mVUstartPC = iPC;
mVUcount = 0;
@ -286,23 +308,19 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUcount++;
}
//mVUlog("mVUcompile mVUsetFlags");
// Sets Up Flag instances
int bStatus[4]; int bMac[4];
mVUsetFlags<vuIndex>(bStatus, bMac);
//mVUlog("mVUcompile Second Pass");
//write8(0xcc);
mVUtestCycles<vuIndex>();
// Second Pass
iPC = mVUstartPC;
setCode();
mVUbranch = 0;
int x;
for (x = 0; x < (vuIndex ? (0x3fff/8) : (0xfff/8)); x++) {
if (isEOB) { x = 0xffff; }
if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(1); } }
if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } }
else if (!swapOps) { incPC(1); doUpperOp(); incPC(-1); mVUopL<vuIndex, 1>(); incPC(1); }
else { mVUopL<vuIndex, 1>(); incPC(1); doUpperOp(); }
@ -336,7 +354,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
PUSH32R(gprR); // Backup EDX
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address
//AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address
MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
@ -372,18 +390,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { mVUlog("microVU: Possible infinite compiling loop!"); }
// Do E-bit end stuff here
incCycles(55); // Ensures Valid P/Q instances
mVUcycles -= 55;
if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ);
AND32ItoM((uptr)&microVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
mVUendProgram<vuIndex>();
//ToDo: Save pipeline state?
return thisPtr;
}

View File

@ -130,8 +130,9 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
microVU* mVU = mVUx;
//mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles);
// ToDo: Implement Cycles
mVUsearchProg(mVU); // Find and set correct program
mVU->cycles = cycles;
mVU->totalCycles = cycles;
x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off
if (!vuIndex) return mVUcompileVU0(startPC, (uptr)&mVU->prog.lpState);
@ -144,7 +145,7 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
microVUt(void) mVUcleanUp() {
microVU* mVU = mVUx;
//mVUlog("microVU: Program exited successfully!");
mVUlog("microVU: Program exited successfully!");
mVUcurProg.x86ptr = x86Ptr;
mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
}

View File

@ -666,7 +666,7 @@ microVUf(void) mVU_MFIR() {
MOVSX32R16toR(gprT1, gprT1);
SSE2_MOVD_R_to_XMM(xmmT1, gprT1);
if (!_XYZW_SS) { mVUunpack_xyzw<vuIndex>(xmmT1, xmmT1, 0); }
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
}
@ -676,7 +676,7 @@ microVUf(void) mVU_MFP() {
else {
mVUlog("MFP");
getPreg(xmmFt);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
}
@ -686,7 +686,7 @@ microVUf(void) mVU_MOVE() {
else {
mVUlog("MOVE");
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
}
@ -697,7 +697,7 @@ microVUf(void) mVU_MR32() {
mVUlog("MR32");
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15);
if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); }
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 0);
}
}
@ -819,7 +819,7 @@ microVUf(void) mVU_LQ() {
if (!_Fs_) {
mVUlog("LQ1");
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
else {
mVUlog("LQ2");
@ -827,7 +827,7 @@ microVUf(void) mVU_LQ() {
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
}
}
@ -839,7 +839,7 @@ microVUf(void) mVU_LQD() {
if (!_Fs_ && !noWriteVF) {
mVUlog("LQD1");
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
else {
mVUlog("LQD2");
@ -849,7 +849,7 @@ microVUf(void) mVU_LQD() {
if (!noWriteVF) {
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
}
}
@ -862,7 +862,7 @@ microVUf(void) mVU_LQI() {
if (!_Fs_ && !noWriteVF) {
mVUlog("LQI1");
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
else {
mVUlog("LQI2");
@ -871,7 +871,7 @@ microVUf(void) mVU_LQI() {
MOV32RtoR(gprT2, gprT1);
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
}
ADD16ItoR(gprT2, 1);
mVUallocVIb<vuIndex>(gprT2, _Fs_);
@ -890,7 +890,7 @@ microVUf(void) mVU_SQ() {
mVUlog("SQ");
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W, 1);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
@ -909,7 +909,7 @@ microVUf(void) mVU_SQD() {
mVUlog("SQD");
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
@ -929,7 +929,7 @@ microVUf(void) mVU_SQI() {
mVUlog("SQI");
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Ft_);

View File

@ -94,7 +94,16 @@ microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
}
// Modifies the Source Reg!
microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) {
microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
/*SSE_MOVAPS_M128_to_XMM(xmmT2, offset);
if (modXYZW && (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1)) {
mVUunpack_xyzw<vuIndex>(reg, reg, 0);
}
mVUmergeRegs<vuIndex>(xmmT2, reg, xyzw);
SSE_MOVAPS_XMM_to_M128(offset, xmmT2);
return;*/
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
SSE_MOVSS_XMM_to_M32(offset+4, reg);
@ -127,10 +136,16 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) {
SSE_MOVHLPS_XMM_to_XMM(reg, reg);
SSE_MOVSS_XMM_to_M32(offset+8, reg);
break; // XYZ
case 4: if (!modXYZW) mVUunpack_xyzw<vuIndex>(reg, reg, 1);
SSE_MOVSS_XMM_to_M32(offset+4, reg);
break; // Y
case 2: if (!modXYZW) mVUunpack_xyzw<vuIndex>(reg, reg, 2);
SSE_MOVSS_XMM_to_M32(offset+8, reg);
break; // Z
case 1: if (!modXYZW) mVUunpack_xyzw<vuIndex>(reg, reg, 3);
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // W
case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y
case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z
case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W
case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY
case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW
default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW
@ -139,6 +154,14 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) {
// Modifies the Source Reg!
microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
/*SSE_MOVAPSRmtoR(xmmT2, gprReg, offset);
if (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1) {
mVUunpack_xyzw<vuIndex>(reg, reg, 0);
}
mVUmergeRegs<vuIndex>(xmmT2, reg, xyzw);
SSE_MOVAPSRtoRm(gprReg, xmmT2, offset);
return;*/
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);