mirror of https://github.com/PCSX2/pcsx2.git
microVU:
-fixed rm instructions to work with Jake's emitter changes -implemented the case where upper and lower instructions write to same reg at once (the lower instruction's result is discarded) -implemented more first pass analyzing stuff -fixed various bugs... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@924 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
3dd99a0932
commit
e3a87fecd9
|
@ -41,7 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
|
||||||
mVU->regs = vuRegsPtr;
|
mVU->regs = vuRegsPtr;
|
||||||
mVU->index = vuIndex;
|
mVU->index = vuIndex;
|
||||||
mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
|
mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
|
||||||
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8;
|
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
|
||||||
mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000);
|
mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000);
|
||||||
mVU->cache = NULL;
|
mVU->cache = NULL;
|
||||||
|
|
||||||
|
|
|
@ -91,9 +91,9 @@ public:
|
||||||
|
|
||||||
template<u32 progSize>
|
template<u32 progSize>
|
||||||
struct microProgram {
|
struct microProgram {
|
||||||
u32 data[progSize];
|
u32 data[progSize/4];
|
||||||
u32 used; // Number of times its been used
|
u32 used; // Number of times its been used
|
||||||
microBlockManager* block[progSize / 2];
|
microBlockManager* block[progSize/8];
|
||||||
microAllocInfo<progSize> allocInfo;
|
microAllocInfo<progSize> allocInfo;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ struct microVU {
|
||||||
u32 cacheAddr; // VU Cache Start Address
|
u32 cacheAddr; // VU Cache Start Address
|
||||||
static const u32 cacheSize = 0x500000; // VU Cache Size
|
static const u32 cacheSize = 0x500000; // VU Cache Size
|
||||||
|
|
||||||
microProgManager<0x1000> prog; // Micro Program Data
|
microProgManager<0x4000> prog; // Micro Program Data
|
||||||
|
|
||||||
VURegs* regs; // VU Regs Struct
|
VURegs* regs; // VU Regs Struct
|
||||||
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
||||||
|
|
|
@ -33,6 +33,8 @@ struct microRegInfo {
|
||||||
u8 VI[32];
|
u8 VI[32];
|
||||||
u8 q;
|
u8 q;
|
||||||
u8 p;
|
u8 p;
|
||||||
|
u8 r;
|
||||||
|
u8 xgkick;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct microTempRegInfo {
|
struct microTempRegInfo {
|
||||||
|
@ -42,6 +44,8 @@ struct microTempRegInfo {
|
||||||
u8 VIreg; // Index of the VI reg
|
u8 VIreg; // Index of the VI reg
|
||||||
u8 q; // Holds cycle info for Q reg
|
u8 q; // Holds cycle info for Q reg
|
||||||
u8 p; // Holds cycle info for P reg
|
u8 p; // Holds cycle info for P reg
|
||||||
|
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
|
||||||
|
u8 xgkick; // Holds the cycle info for XGkick
|
||||||
};
|
};
|
||||||
|
|
||||||
template<u32 pSize>
|
template<u32 pSize>
|
||||||
|
@ -49,34 +53,9 @@ struct microAllocInfo {
|
||||||
microRegInfo regs; // Pipeline info
|
microRegInfo regs; // Pipeline info
|
||||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||||
u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR)
|
u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR)
|
||||||
u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag
|
|
||||||
u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time.
|
|
||||||
u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
|
u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
|
||||||
u32 cycles; // Cycles for current block
|
u32 cycles; // Cycles for current block
|
||||||
u32 curPC; // Current PC
|
u32 curPC; // Current PC
|
||||||
u32 startPC; // Start PC for Cur Block
|
u32 startPC; // Start PC for Cur Block
|
||||||
u32 info[pSize]; // bit 00 = Lower Instruction is NOP
|
u32 info[pSize/8]; // Info for Instructions in current block
|
||||||
// bit 01
|
|
||||||
// bit 02
|
|
||||||
// bit 03
|
|
||||||
// bit 04
|
|
||||||
// bit 05 = Write to Q1 or Q2?
|
|
||||||
// bit 06 = Read Q1 or Q2?
|
|
||||||
// bit 07 = Read/Write to P1 or P2?
|
|
||||||
// bit 08 = Update Mac Flags?
|
|
||||||
// bit 09 = Update Status Flags?
|
|
||||||
// bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance
|
|
||||||
// bit 11
|
|
||||||
// bit 12 = Used with bit 13 to make a 2-bit key for status flag instance
|
|
||||||
// bit 13
|
|
||||||
// bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance
|
|
||||||
// bit 15
|
|
||||||
// bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance
|
|
||||||
// bit 17
|
|
||||||
// bit 18 = Used with bit 19 to make a 2-bit key for status flag instance
|
|
||||||
// bit 19
|
|
||||||
// bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance
|
|
||||||
// bit 21
|
|
||||||
// bit 22 = Read VI(Fs) from backup memory?
|
|
||||||
// bit 23 = Read VI(Ft) from backup memory?
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) {
|
||||||
// FMAC6 - Normal FMAC Opcodes (I Reg)
|
// FMAC6 - Normal FMAC Opcodes (I Reg)
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
#define getIreg(reg) { \
|
#define getIreg(reg, modXYZW) { \
|
||||||
MOV32ItoR(gprT1, mVU->iReg); \
|
MOV32ItoR(gprT1, mVU->iReg); \
|
||||||
SSE2_MOVD_R_to_XMM(reg, gprT1); \
|
SSE2_MOVD_R_to_XMM(reg, gprT1); \
|
||||||
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
|
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
|
||||||
if (!_XYZW_SS) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
|
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
|
microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
|
||||||
|
@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
|
||||||
Fs = xmmFs;
|
Fs = xmmFs;
|
||||||
Ft = xmmFt;
|
Ft = xmmFt;
|
||||||
Fd = xmmFs;
|
Fd = xmmFs;
|
||||||
getIreg(Ft);
|
getIreg(Ft, 1);
|
||||||
getReg6(Fs, _Fs_);
|
getReg6(Fs, _Fs_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) {
|
||||||
ACC = xmmACC;
|
ACC = xmmACC;
|
||||||
Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs;
|
Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs;
|
||||||
Ft = xmmFt;
|
Ft = xmmFt;
|
||||||
getIreg(Ft);
|
getIreg(Ft, 0);
|
||||||
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
||||||
else if (!_Fs_) { getZero4(Fs); }
|
else if (!_Fs_) { getZero4(Fs); }
|
||||||
else { getReg4(Fs, _Fs_); }
|
else { getReg4(Fs, _Fs_); }
|
||||||
|
@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) {
|
||||||
Ft = xmmFt;
|
Ft = xmmFt;
|
||||||
Fd = xmmFs;
|
Fd = xmmFs;
|
||||||
ACC = xmmACC;
|
ACC = xmmACC;
|
||||||
getIreg(Ft);
|
getIreg(Ft, 0);
|
||||||
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
||||||
else if (!_Fs_) { getZero4(Fs); }
|
else if (!_Fs_) { getZero4(Fs); }
|
||||||
else { getReg4(Fs, _Fs_); }
|
else { getReg4(Fs, _Fs_); }
|
||||||
|
@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) {
|
||||||
Fd = xmmT1;
|
Fd = xmmT1;
|
||||||
ACC = xmmT1;
|
ACC = xmmT1;
|
||||||
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
|
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
|
||||||
getIreg(Ft);
|
getIreg(Ft, 0);
|
||||||
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
||||||
else if (!_Fs_) { getZero4(Fs); }
|
else if (!_Fs_) { getZero4(Fs); }
|
||||||
else { getReg4(Fs, _Fs_); }
|
else { getReg4(Fs, _Fs_); }
|
||||||
|
@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
|
||||||
ACCw = xmmACC;
|
ACCw = xmmACC;
|
||||||
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
|
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
|
||||||
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
|
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
|
||||||
getIreg(Ft);
|
getIreg(Ft, 0);
|
||||||
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
|
||||||
else if (!_Fs_) { getZero4(Fs); }
|
else if (!_Fs_) { getZero4(Fs); }
|
||||||
else { getReg4(Fs, _Fs_); }
|
else { getReg4(Fs, _Fs_); }
|
||||||
|
|
|
@ -143,6 +143,25 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) {
|
||||||
analyzePreg(xCycles);
|
analyzePreg(xCycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// R*** - R Reg Opcodes
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define analyzeRreg() { mVUregsTemp.r = 1; }
|
||||||
|
|
||||||
|
microVUt(void) mVUanalyzeR1(int Fs, int Fsf) {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
analyzeReg5(Fs, Fsf);
|
||||||
|
analyzeRreg();
|
||||||
|
}
|
||||||
|
|
||||||
|
microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; }
|
||||||
|
analyzeReg2(Ft);
|
||||||
|
analyzeRreg();
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Sflag - Status Flag Opcodes
|
// Sflag - Status Flag Opcodes
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -157,4 +176,18 @@ microVUt(void) mVUanalyzeSflag(int It) {
|
||||||
analyzeVIreg2(It, 1);
|
analyzeVIreg2(It, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// XGkick
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
|
||||||
|
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
|
||||||
|
|
||||||
|
microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
analyzeVIreg1(Fs);
|
||||||
|
analyzeXGkick1();
|
||||||
|
analyzeXGkick2(xCycles);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //PCSX2_MICROVU
|
#endif //PCSX2_MICROVU
|
||||||
|
|
|
@ -29,19 +29,51 @@
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define curI mVUcurProg.data[iPC]
|
#define curI mVUcurProg.data[iPC]
|
||||||
#define setCode() { mVU->code = curI; }
|
#define setCode() { mVU->code = curI; }
|
||||||
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
|
#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
|
||||||
#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
|
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
|
||||||
|
#define incCycles(x) { mVUincCycles<vuIndex>(x); }
|
||||||
|
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||||
|
|
||||||
|
microVUt(void) mVUincCycles(int x) {
|
||||||
|
mVUcycles += x;
|
||||||
|
for (int z = 31; z > 0; z--) {
|
||||||
|
calcCycles(mVUregs.VF[z].x, x);
|
||||||
|
calcCycles(mVUregs.VF[z].y, x);
|
||||||
|
calcCycles(mVUregs.VF[z].z, x);
|
||||||
|
calcCycles(mVUregs.VF[z].w, x);
|
||||||
|
}
|
||||||
|
for (int z = 16; z > 0; z--) {
|
||||||
|
calcCycles(mVUregs.VI[z], x);
|
||||||
|
}
|
||||||
|
if (mVUregs.q) {
|
||||||
|
calcCycles(mVUregs.q, x);
|
||||||
|
if (!mVUregs.q) {} // Do Status Flag Merging Stuff?
|
||||||
|
}
|
||||||
|
calcCycles(mVUregs.p, x);
|
||||||
|
calcCycles(mVUregs.r, x);
|
||||||
|
calcCycles(mVUregs.xgkick, x);
|
||||||
|
}
|
||||||
|
|
||||||
microVUt(void) mVUsetCycles() {
|
microVUt(void) mVUsetCycles() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
incCycles(mVUstall);
|
incCycles(mVUstall);
|
||||||
|
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
|
||||||
|
mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP
|
||||||
|
//mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?)
|
||||||
|
mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?)
|
||||||
|
mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y);
|
||||||
|
mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z);
|
||||||
|
mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w);
|
||||||
|
}
|
||||||
mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg;
|
mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg;
|
||||||
mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg);
|
mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg;
|
||||||
mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI;
|
mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI;
|
||||||
mVUregs.q = mVUregsTemp.q;
|
mVUregs.q = mVUregsTemp.q;
|
||||||
mVUregs.p = mVUregsTemp.p;
|
mVUregs.p = mVUregsTemp.p;
|
||||||
|
mVUregs.r = mVUregsTemp.r;
|
||||||
|
mVUregs.xgkick = mVUregsTemp.xgkick;
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
|
microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
|
||||||
|
@ -70,14 +102,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
|
||||||
else if (branch == 1) { branch = 2; }
|
else if (branch == 1) { branch = 2; }
|
||||||
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
|
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
|
||||||
incPC(1);
|
incPC(1);
|
||||||
|
incCycles(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second Pass
|
// Second Pass
|
||||||
iPC = startPC;
|
iPC = mVUstartPC;
|
||||||
setCode();
|
setCode();
|
||||||
for (bool x = 1; x; ) {
|
for (bool x = 1; x; ) {
|
||||||
//
|
//
|
||||||
// ToDo: status/mac flag stuff
|
// ToDo: status/mac flag stuff?
|
||||||
//
|
//
|
||||||
if (isEOB) { x = 0; }
|
if (isEOB) { x = 0; }
|
||||||
else if (isBranch) { mVUopU<vuIndex, 1>(); incPC(2); }
|
else if (isBranch) { mVUopU<vuIndex, 1>(); incPC(2); }
|
||||||
|
@ -85,6 +118,7 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
|
||||||
mVUopU<vuIndex, 1>();
|
mVUopU<vuIndex, 1>();
|
||||||
if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
|
if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
|
||||||
else { incPC(1); mVUopL<vuIndex, 1>(); }
|
else { incPC(1); mVUopL<vuIndex, 1>(); }
|
||||||
|
|
||||||
if (!isBdelay) { incPC(1); }
|
if (!isBdelay) { incPC(1); }
|
||||||
else {
|
else {
|
||||||
incPC(-2); // Go back to Branch Opcode
|
incPC(-2); // Go back to Branch Opcode
|
||||||
|
|
|
@ -648,18 +648,9 @@ microVUf(void) mVU_ISUBIU() {
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// MOVE/MFIR/MFP/MTIR/MR32
|
// MFIR/MFP/MOVE/MR32/MTIR
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
microVUf(void) mVU_MOVE() {
|
|
||||||
microVU* mVU = mVUx;
|
|
||||||
if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
|
|
||||||
else {
|
|
||||||
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
|
|
||||||
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
microVUf(void) mVU_MFIR() {
|
microVUf(void) mVU_MFIR() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) { /*If (!_Ft_) nop();*/ }
|
if (!recPass) { /*If (!_Ft_) nop();*/ }
|
||||||
|
@ -681,12 +672,12 @@ microVUf(void) mVU_MFP() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUf(void) mVU_MTIR() {
|
microVUf(void) mVU_MOVE() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) {}
|
if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
|
||||||
else {
|
else {
|
||||||
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
|
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -700,6 +691,15 @@ microVUf(void) mVU_MR32() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
microVUf(void) mVU_MTIR() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (!recPass) {}
|
||||||
|
else {
|
||||||
|
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
|
||||||
|
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// ILW/ILWR
|
// ILW/ILWR
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -716,7 +716,7 @@ microVUf(void) mVU_ILW() {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
ADD32ItoR(gprT1, _Imm11_);
|
ADD32ItoR(gprT1, _Imm11_);
|
||||||
mVUaddrFix<vuIndex>(gprT1);
|
mVUaddrFix<vuIndex>(gprT1);
|
||||||
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
|
MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
|
||||||
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
|
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
||||||
}
|
}
|
||||||
|
@ -728,13 +728,13 @@ microVUf(void) mVU_ILWR() {
|
||||||
if (!recPass) { /*If (!_Ft_) nop();*/ }
|
if (!recPass) { /*If (!_Ft_) nop();*/ }
|
||||||
else {
|
else {
|
||||||
if (!_Fs_) {
|
if (!_Fs_) {
|
||||||
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS );
|
MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
mVUaddrFix<vuIndex>(gprT1);
|
mVUaddrFix<vuIndex>(gprT1);
|
||||||
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
|
MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
|
||||||
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
|
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
mVUallocVIb<vuIndex>(gprT1, _Ft_);
|
||||||
}
|
}
|
||||||
|
@ -762,10 +762,10 @@ microVUf(void) mVU_ISW() {
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||||
ADD32ItoR(gprT1, _Imm11_);
|
ADD32ItoR(gprT1, _Imm11_);
|
||||||
mVUaddrFix<vuIndex>(gprT1);
|
mVUaddrFix<vuIndex>(gprT1);
|
||||||
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
|
if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
|
||||||
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
|
if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
|
||||||
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
|
if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
|
||||||
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
|
if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -785,10 +785,10 @@ microVUf(void) mVU_ISWR() {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||||
mVUaddrFix<vuIndex>(gprT1);
|
mVUaddrFix<vuIndex>(gprT1);
|
||||||
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
|
if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
|
||||||
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
|
if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
|
||||||
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
|
if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
|
||||||
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
|
if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -819,7 +819,7 @@ microVUf(void) mVU_LQD() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) {}
|
if (!recPass) {}
|
||||||
else {
|
else {
|
||||||
if (!_Fs_ && _Ft_) {
|
if (!_Fs_ && _Ft_ && !noWriteVF) {
|
||||||
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
||||||
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
||||||
}
|
}
|
||||||
|
@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
SUB16ItoR(gprT1, 1);
|
SUB16ItoR(gprT1, 1);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Fs_); // ToDo: Backup to memory check.
|
mVUallocVIb<vuIndex>(gprT1, _Fs_); // ToDo: Backup to memory check.
|
||||||
if (_Ft_) {
|
if (_Ft_ && !noWriteVF) {
|
||||||
mVUaddrFix<vuIndex>(gprT1);
|
mVUaddrFix<vuIndex>(gprT1);
|
||||||
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
||||||
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
||||||
|
@ -840,13 +840,13 @@ microVUf(void) mVU_LQI() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) {}
|
if (!recPass) {}
|
||||||
else {
|
else {
|
||||||
if (!_Fs_ && _Ft_) {
|
if (!_Fs_ && _Ft_ && !noWriteVF) {
|
||||||
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
||||||
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>((_Ft_) ? gprT1 : gprT2, _Fs_);
|
mVUallocVIa<vuIndex>((_Ft_) ? gprT1 : gprT2, _Fs_);
|
||||||
if (_Ft_) {
|
if (_Ft_ && !noWriteVF) {
|
||||||
MOV32RtoR(gprT2, gprT1);
|
MOV32RtoR(gprT2, gprT1);
|
||||||
mVUaddrFix<vuIndex>(gprT1);
|
mVUaddrFix<vuIndex>(gprT1);
|
||||||
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
||||||
|
@ -925,7 +925,7 @@ microVUf(void) mVU_SQI() {
|
||||||
|
|
||||||
microVUf(void) mVU_RINIT() {
|
microVUf(void) mVU_RINIT() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) {}
|
if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
|
||||||
else {
|
else {
|
||||||
if (_Fs_ || (_Fsf_ == 3)) {
|
if (_Fs_ || (_Fsf_ == 3)) {
|
||||||
getReg8(gprR, _Fs_, _Fsf_);
|
getReg8(gprR, _Fs_, _Fsf_);
|
||||||
|
@ -938,7 +938,7 @@ microVUf(void) mVU_RINIT() {
|
||||||
|
|
||||||
microVUt(void) mVU_RGET_() {
|
microVUt(void) mVU_RGET_() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (_Ft_) {
|
if (!noWriteVF) {
|
||||||
if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR);
|
if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR);
|
||||||
if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR);
|
if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR);
|
||||||
if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR);
|
if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR);
|
||||||
|
@ -948,13 +948,13 @@ microVUt(void) mVU_RGET_() {
|
||||||
|
|
||||||
microVUf(void) mVU_RGET() {
|
microVUf(void) mVU_RGET() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) { /*if (!_Ft_) nop();*/ }
|
if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 1); }
|
||||||
else { mVU_RGET_<vuIndex>(); }
|
else { mVU_RGET_<vuIndex>(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUf(void) mVU_RNEXT() {
|
microVUf(void) mVU_RNEXT() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) { /*if (!_Ft_) nop();*/ }
|
if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 0); }
|
||||||
else {
|
else {
|
||||||
// algorithm from www.project-fao.org
|
// algorithm from www.project-fao.org
|
||||||
MOV32RtoR(gprT1, gprR);
|
MOV32RtoR(gprT1, gprR);
|
||||||
|
@ -976,7 +976,7 @@ microVUf(void) mVU_RNEXT() {
|
||||||
|
|
||||||
microVUf(void) mVU_RXOR() {
|
microVUf(void) mVU_RXOR() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) {}
|
if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
|
||||||
else {
|
else {
|
||||||
if (_Fs_ || (_Fsf_ == 3)) {
|
if (_Fs_ || (_Fsf_ == 3)) {
|
||||||
getReg8(gprT1, _Fs_, _Fsf_);
|
getReg8(gprT1, _Fs_, _Fsf_);
|
||||||
|
@ -1039,7 +1039,7 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); }
|
||||||
|
|
||||||
microVUf(void) mVU_XGKICK() {
|
microVUf(void) mVU_XGKICK() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (!recPass) {}
|
if (!recPass) { mVUanalyzeXGkick<vuIndex>(_Fs_, 4); }
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall
|
mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall
|
||||||
if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0);
|
if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0);
|
||||||
|
|
|
@ -152,7 +152,6 @@ declareAllVariables
|
||||||
#define mVUstartPC mVUallocInfo.startPC
|
#define mVUstartPC mVUallocInfo.startPC
|
||||||
#define iPC mVUallocInfo.curPC
|
#define iPC mVUallocInfo.curPC
|
||||||
#define xPC ((iPC / 2) * 8)
|
#define xPC ((iPC / 2) * 8)
|
||||||
#define incCycles(x) { mVUcycles += x; }
|
|
||||||
|
|
||||||
#define _isNOP (1<<0) // Skip Lower Instruction
|
#define _isNOP (1<<0) // Skip Lower Instruction
|
||||||
#define _isBranch (1<<1) // Cur Instruction is a Branch
|
#define _isBranch (1<<1) // Cur Instruction is a Branch
|
||||||
|
@ -168,12 +167,13 @@ declareAllVariables
|
||||||
#define _doStatus (1<<9)
|
#define _doStatus (1<<9)
|
||||||
#define _fmInstance (3<<10)
|
#define _fmInstance (3<<10)
|
||||||
#define _fsInstance (3<<12)
|
#define _fsInstance (3<<12)
|
||||||
#define _fcInstance (3<<14)
|
|
||||||
#define _fpmInstance (3<<10)
|
|
||||||
#define _fpsInstance (3<<12)
|
#define _fpsInstance (3<<12)
|
||||||
|
#define _fcInstance (3<<14)
|
||||||
|
#define _fpcInstance (3<<14)
|
||||||
#define _fvmInstance (3<<16)
|
#define _fvmInstance (3<<16)
|
||||||
#define _fvsInstance (3<<18)
|
#define _fvsInstance (3<<18)
|
||||||
#define _fvcInstance (3<<14)
|
#define _fvcInstance (3<<20)
|
||||||
|
#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
|
||||||
|
|
||||||
#define isNOP (mVUinfo & (1<<0))
|
#define isNOP (mVUinfo & (1<<0))
|
||||||
#define isBranch (mVUinfo & (1<<1))
|
#define isBranch (mVUinfo & (1<<1))
|
||||||
|
@ -195,6 +195,7 @@ declareAllVariables
|
||||||
#define fvmInstance ((mVUinfo >> 16) & 3)
|
#define fvmInstance ((mVUinfo >> 16) & 3)
|
||||||
#define fvsInstance ((mVUinfo >> 18) & 3)
|
#define fvsInstance ((mVUinfo >> 18) & 3)
|
||||||
#define fvcInstance ((mVUinfo >> 20) & 3)
|
#define fvcInstance ((mVUinfo >> 20) & 3)
|
||||||
|
#define noWriteVF (mVUinfo & (1<<21))
|
||||||
|
|
||||||
//#define getFs (mVUinfo & (1<<13))
|
//#define getFs (mVUinfo & (1<<13))
|
||||||
//#define getFt (mVUinfo & (1<<14))
|
//#define getFt (mVUinfo & (1<<14))
|
||||||
|
|
|
@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) {
|
||||||
|
|
||||||
microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
|
microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
|
||||||
switch( xyzw ) {
|
switch( xyzw ) {
|
||||||
case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X
|
case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X
|
||||||
case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y
|
case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y
|
||||||
case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z
|
case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z
|
||||||
case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W
|
case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W
|
||||||
default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break;
|
default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
|
||||||
switch ( xyzw ) {
|
switch ( xyzw ) {
|
||||||
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
|
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
|
||||||
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg);
|
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
|
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
|
||||||
break; // YW
|
break; // YW
|
||||||
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
|
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
|
||||||
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
|
SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
|
||||||
break; // YZ
|
break; // YZ
|
||||||
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
|
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
|
||||||
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
|
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
|
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
|
||||||
break; // YZW
|
break; // YZW
|
||||||
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
|
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
|
||||||
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
|
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
|
||||||
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
|
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
|
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
|
||||||
break; // XW
|
break; // XW
|
||||||
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
|
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
|
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
|
||||||
break; //XZ
|
break; //XZ
|
||||||
case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
|
case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
|
||||||
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg);
|
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
|
||||||
break; //XZW
|
break; //XZW
|
||||||
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
|
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
|
||||||
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1);
|
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
|
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
|
||||||
break; // XYW
|
break; // XYW
|
||||||
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
||||||
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg);
|
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
|
||||||
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
|
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
|
||||||
break; // XYZ
|
break; // XYZ
|
||||||
case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X
|
case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X
|
||||||
case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y
|
case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y
|
||||||
case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z
|
case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z
|
||||||
case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W
|
case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W
|
||||||
case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY
|
case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY
|
||||||
case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW
|
case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW
|
||||||
default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW
|
default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue