-fixed rm instructions to work with Jake's emitter changes
-implemented the case where upper and lower instructions write to same reg at once (the lower instruction's result is discarded)
-implemented more first pass analyzing stuff
-fixed various bugs...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@924 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-08 10:07:14 +00:00
parent 3dd99a0932
commit e3a87fecd9
9 changed files with 157 additions and 110 deletions

View File

@ -41,7 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
mVU->regs = vuRegsPtr; mVU->regs = vuRegsPtr;
mVU->index = vuIndex; mVU->index = vuIndex;
mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8; mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000);
mVU->cache = NULL; mVU->cache = NULL;

View File

@ -91,9 +91,9 @@ public:
template<u32 progSize> template<u32 progSize>
struct microProgram { struct microProgram {
u32 data[progSize]; u32 data[progSize/4];
u32 used; // Number of times its been used u32 used; // Number of times its been used
microBlockManager* block[progSize / 2]; microBlockManager* block[progSize/8];
microAllocInfo<progSize> allocInfo; microAllocInfo<progSize> allocInfo;
}; };
@ -116,7 +116,7 @@ struct microVU {
u32 cacheAddr; // VU Cache Start Address u32 cacheAddr; // VU Cache Start Address
static const u32 cacheSize = 0x500000; // VU Cache Size static const u32 cacheSize = 0x500000; // VU Cache Size
microProgManager<0x1000> prog; // Micro Program Data microProgManager<0x4000> prog; // Micro Program Data
VURegs* regs; // VU Regs Struct VURegs* regs; // VU Regs Struct
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)

View File

@ -33,6 +33,8 @@ struct microRegInfo {
u8 VI[32]; u8 VI[32];
u8 q; u8 q;
u8 p; u8 p;
u8 r;
u8 xgkick;
}; };
struct microTempRegInfo { struct microTempRegInfo {
@ -42,6 +44,8 @@ struct microTempRegInfo {
u8 VIreg; // Index of the VI reg u8 VIreg; // Index of the VI reg
u8 q; // Holds cycle info for Q reg u8 q; // Holds cycle info for Q reg
u8 p; // Holds cycle info for P reg u8 p; // Holds cycle info for P reg
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
u8 xgkick; // Holds the cycle info for XGkick
}; };
template<u32 pSize> template<u32 pSize>
@ -49,34 +53,9 @@ struct microAllocInfo {
microRegInfo regs; // Pipeline info microRegInfo regs; // Pipeline info
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR)
u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag
u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time.
u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
u32 cycles; // Cycles for current block u32 cycles; // Cycles for current block
u32 curPC; // Current PC u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block u32 startPC; // Start PC for Cur Block
u32 info[pSize]; // bit 00 = Lower Instruction is NOP u32 info[pSize/8]; // Info for Instructions in current block
// bit 01
// bit 02
// bit 03
// bit 04
// bit 05 = Write to Q1 or Q2?
// bit 06 = Read Q1 or Q2?
// bit 07 = Read/Write to P1 or P2?
// bit 08 = Update Mac Flags?
// bit 09 = Update Status Flags?
// bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance
// bit 11
// bit 12 = Used with bit 13 to make a 2-bit key for status flag instance
// bit 13
// bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance
// bit 15
// bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance
// bit 17
// bit 18 = Used with bit 19 to make a 2-bit key for status flag instance
// bit 19
// bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance
// bit 21
// bit 22 = Read VI(Fs) from backup memory?
// bit 23 = Read VI(Ft) from backup memory?
}; };

View File

@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) {
// FMAC6 - Normal FMAC Opcodes (I Reg) // FMAC6 - Normal FMAC Opcodes (I Reg)
//------------------------------------------------------------------ //------------------------------------------------------------------
#define getIreg(reg) { \ #define getIreg(reg, modXYZW) { \
MOV32ItoR(gprT1, mVU->iReg); \ MOV32ItoR(gprT1, mVU->iReg); \
SSE2_MOVD_R_to_XMM(reg, gprT1); \ SSE2_MOVD_R_to_XMM(reg, gprT1); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
if (!_XYZW_SS) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \ if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
} }
microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
Fs = xmmFs; Fs = xmmFs;
Ft = xmmFt; Ft = xmmFt;
Fd = xmmFs; Fd = xmmFs;
getIreg(Ft); getIreg(Ft, 1);
getReg6(Fs, _Fs_); getReg6(Fs, _Fs_);
} }
@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) {
ACC = xmmACC; ACC = xmmACC;
Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs; Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs;
Ft = xmmFt; Ft = xmmFt;
getIreg(Ft); getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); } else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); } else { getReg4(Fs, _Fs_); }
@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt; Ft = xmmFt;
Fd = xmmFs; Fd = xmmFs;
ACC = xmmACC; ACC = xmmACC;
getIreg(Ft); getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); } else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); } else { getReg4(Fs, _Fs_); }
@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) {
Fd = xmmT1; Fd = xmmT1;
ACC = xmmT1; ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
getIreg(Ft); getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); } else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); } else { getReg4(Fs, _Fs_); }
@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
ACCw = xmmACC; ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
getIreg(Ft); getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); } else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); } else { getReg4(Fs, _Fs_); }

View File

@ -143,6 +143,25 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) {
analyzePreg(xCycles); analyzePreg(xCycles);
} }
//------------------------------------------------------------------
// R*** - R Reg Opcodes
//------------------------------------------------------------------
#define analyzeRreg() { mVUregsTemp.r = 1; }
microVUt(void) mVUanalyzeR1(int Fs, int Fsf) {
microVU* mVU = mVUx;
analyzeReg5(Fs, Fsf);
analyzeRreg();
}
microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
microVU* mVU = mVUx;
if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; }
analyzeReg2(Ft);
analyzeRreg();
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// Sflag - Status Flag Opcodes // Sflag - Status Flag Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -157,4 +176,18 @@ microVUt(void) mVUanalyzeSflag(int It) {
analyzeVIreg2(It, 1); analyzeVIreg2(It, 1);
} }
//------------------------------------------------------------------
// XGkick
//------------------------------------------------------------------
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) {
microVU* mVU = mVUx;
analyzeVIreg1(Fs);
analyzeXGkick1();
analyzeXGkick2(xCycles);
}
#endif //PCSX2_MICROVU #endif //PCSX2_MICROVU

View File

@ -29,19 +29,51 @@
} \ } \
} }
#define curI mVUcurProg.data[iPC] #define curI mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; } #define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } #define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } #define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incCycles(x) { mVUincCycles<vuIndex>(x); }
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
microVUt(void) mVUincCycles(int x) {
mVUcycles += x;
for (int z = 31; z > 0; z--) {
calcCycles(mVUregs.VF[z].x, x);
calcCycles(mVUregs.VF[z].y, x);
calcCycles(mVUregs.VF[z].z, x);
calcCycles(mVUregs.VF[z].w, x);
}
for (int z = 16; z > 0; z--) {
calcCycles(mVUregs.VI[z], x);
}
if (mVUregs.q) {
calcCycles(mVUregs.q, x);
if (!mVUregs.q) {} // Do Status Flag Merging Stuff?
}
calcCycles(mVUregs.p, x);
calcCycles(mVUregs.r, x);
calcCycles(mVUregs.xgkick, x);
}
microVUt(void) mVUsetCycles() { microVUt(void) mVUsetCycles() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
incCycles(mVUstall); incCycles(mVUstall);
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP
//mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?)
mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?)
mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y);
mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z);
mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w);
}
mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg; mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg;
mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg); mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg;
mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI; mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI;
mVUregs.q = mVUregsTemp.q; mVUregs.q = mVUregsTemp.q;
mVUregs.p = mVUregsTemp.p; mVUregs.p = mVUregsTemp.p;
mVUregs.r = mVUregsTemp.r;
mVUregs.xgkick = mVUregsTemp.xgkick;
} }
microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
@ -70,14 +102,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
else if (branch == 1) { branch = 2; } else if (branch == 1) { branch = 2; }
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
incPC(1); incPC(1);
incCycles(1);
} }
// Second Pass // Second Pass
iPC = startPC; iPC = mVUstartPC;
setCode(); setCode();
for (bool x = 1; x; ) { for (bool x = 1; x; ) {
// //
// ToDo: status/mac flag stuff // ToDo: status/mac flag stuff?
// //
if (isEOB) { x = 0; } if (isEOB) { x = 0; }
else if (isBranch) { mVUopU<vuIndex, 1>(); incPC(2); } else if (isBranch) { mVUopU<vuIndex, 1>(); incPC(2); }
@ -85,6 +118,7 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
mVUopU<vuIndex, 1>(); mVUopU<vuIndex, 1>();
if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
else { incPC(1); mVUopL<vuIndex, 1>(); } else { incPC(1); mVUopL<vuIndex, 1>(); }
if (!isBdelay) { incPC(1); } if (!isBdelay) { incPC(1); }
else { else {
incPC(-2); // Go back to Branch Opcode incPC(-2); // Go back to Branch Opcode

View File

@ -648,18 +648,9 @@ microVUf(void) mVU_ISUBIU() {
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
// MOVE/MFIR/MFP/MTIR/MR32 // MFIR/MFP/MOVE/MR32/MTIR
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
else {
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MFIR() { microVUf(void) mVU_MFIR() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ } if (!recPass) { /*If (!_Ft_) nop();*/ }
@ -681,12 +672,12 @@ microVUf(void) mVU_MFP() {
} }
} }
microVUf(void) mVU_MTIR() { microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) {} if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
else { else {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUallocVIb<vuIndex>(gprT1, _Ft_); mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
} }
} }
@ -700,6 +691,15 @@ microVUf(void) mVU_MR32() {
} }
} }
microVUf(void) mVU_MTIR() {
microVU* mVU = mVUx;
if (!recPass) {}
else {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// ILW/ILWR // ILW/ILWR
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -716,7 +716,7 @@ microVUf(void) mVU_ILW() {
mVUallocVIa<vuIndex>(gprT1, _Fs_); mVUallocVIa<vuIndex>(gprT1, _Fs_);
ADD32ItoR(gprT1, _Imm11_); ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1); mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_); mVUallocVIb<vuIndex>(gprT1, _Ft_);
} }
@ -728,13 +728,13 @@ microVUf(void) mVU_ILWR() {
if (!recPass) { /*If (!_Ft_) nop();*/ } if (!recPass) { /*If (!_Ft_) nop();*/ }
else { else {
if (!_Fs_) { if (!_Fs_) {
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS ); MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS);
mVUallocVIb<vuIndex>(gprT1, _Ft_); mVUallocVIb<vuIndex>(gprT1, _Ft_);
} }
else { else {
mVUallocVIa<vuIndex>(gprT1, _Fs_); mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUaddrFix<vuIndex>(gprT1); mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_); mVUallocVIb<vuIndex>(gprT1, _Ft_);
} }
@ -762,10 +762,10 @@ microVUf(void) mVU_ISW() {
mVUallocVIa<vuIndex>(gprT2, _Ft_); mVUallocVIa<vuIndex>(gprT2, _Ft_);
ADD32ItoR(gprT1, _Imm11_); ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1); mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
} }
} }
} }
@ -785,10 +785,10 @@ microVUf(void) mVU_ISWR() {
mVUallocVIa<vuIndex>(gprT1, _Fs_); mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_); mVUallocVIa<vuIndex>(gprT2, _Ft_);
mVUaddrFix<vuIndex>(gprT1); mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
} }
} }
} }
@ -819,7 +819,7 @@ microVUf(void) mVU_LQD() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) {} if (!recPass) {}
else { else {
if (!_Fs_ && _Ft_) { if (!_Fs_ && _Ft_ && !noWriteVF) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
} }
@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() {
mVUallocVIa<vuIndex>(gprT1, _Fs_); mVUallocVIa<vuIndex>(gprT1, _Fs_);
SUB16ItoR(gprT1, 1); SUB16ItoR(gprT1, 1);
mVUallocVIb<vuIndex>(gprT1, _Fs_); // ToDo: Backup to memory check. mVUallocVIb<vuIndex>(gprT1, _Fs_); // ToDo: Backup to memory check.
if (_Ft_) { if (_Ft_ && !noWriteVF) {
mVUaddrFix<vuIndex>(gprT1); mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
@ -840,13 +840,13 @@ microVUf(void) mVU_LQI() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) {} if (!recPass) {}
else { else {
if (!_Fs_ && _Ft_) { if (!_Fs_ && _Ft_ && !noWriteVF) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
} }
else { else {
mVUallocVIa<vuIndex>((_Ft_) ? gprT1 : gprT2, _Fs_); mVUallocVIa<vuIndex>((_Ft_) ? gprT1 : gprT2, _Fs_);
if (_Ft_) { if (_Ft_ && !noWriteVF) {
MOV32RtoR(gprT2, gprT1); MOV32RtoR(gprT2, gprT1);
mVUaddrFix<vuIndex>(gprT1); mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
@ -925,7 +925,7 @@ microVUf(void) mVU_SQI() {
microVUf(void) mVU_RINIT() { microVUf(void) mVU_RINIT() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) {} if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
else { else {
if (_Fs_ || (_Fsf_ == 3)) { if (_Fs_ || (_Fsf_ == 3)) {
getReg8(gprR, _Fs_, _Fsf_); getReg8(gprR, _Fs_, _Fsf_);
@ -938,7 +938,7 @@ microVUf(void) mVU_RINIT() {
microVUt(void) mVU_RGET_() { microVUt(void) mVU_RGET_() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (_Ft_) { if (!noWriteVF) {
if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR); if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR);
if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR); if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR);
if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR); if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR);
@ -948,13 +948,13 @@ microVUt(void) mVU_RGET_() {
microVUf(void) mVU_RGET() { microVUf(void) mVU_RGET() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ } if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 1); }
else { mVU_RGET_<vuIndex>(); } else { mVU_RGET_<vuIndex>(); }
} }
microVUf(void) mVU_RNEXT() { microVUf(void) mVU_RNEXT() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ } if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 0); }
else { else {
// algorithm from www.project-fao.org // algorithm from www.project-fao.org
MOV32RtoR(gprT1, gprR); MOV32RtoR(gprT1, gprR);
@ -976,7 +976,7 @@ microVUf(void) mVU_RNEXT() {
microVUf(void) mVU_RXOR() { microVUf(void) mVU_RXOR() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) {} if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
else { else {
if (_Fs_ || (_Fsf_ == 3)) { if (_Fs_ || (_Fsf_ == 3)) {
getReg8(gprT1, _Fs_, _Fsf_); getReg8(gprT1, _Fs_, _Fsf_);
@ -1039,7 +1039,7 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); }
microVUf(void) mVU_XGKICK() { microVUf(void) mVU_XGKICK() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!recPass) {} if (!recPass) { mVUanalyzeXGkick<vuIndex>(_Fs_, 4); }
else { else {
mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall
if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0);

View File

@ -152,7 +152,6 @@ declareAllVariables
#define mVUstartPC mVUallocInfo.startPC #define mVUstartPC mVUallocInfo.startPC
#define iPC mVUallocInfo.curPC #define iPC mVUallocInfo.curPC
#define xPC ((iPC / 2) * 8) #define xPC ((iPC / 2) * 8)
#define incCycles(x) { mVUcycles += x; }
#define _isNOP (1<<0) // Skip Lower Instruction #define _isNOP (1<<0) // Skip Lower Instruction
#define _isBranch (1<<1) // Cur Instruction is a Branch #define _isBranch (1<<1) // Cur Instruction is a Branch
@ -168,12 +167,13 @@ declareAllVariables
#define _doStatus (1<<9) #define _doStatus (1<<9)
#define _fmInstance (3<<10) #define _fmInstance (3<<10)
#define _fsInstance (3<<12) #define _fsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpmInstance (3<<10)
#define _fpsInstance (3<<12) #define _fpsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpcInstance (3<<14)
#define _fvmInstance (3<<16) #define _fvmInstance (3<<16)
#define _fvsInstance (3<<18) #define _fvsInstance (3<<18)
#define _fvcInstance (3<<14) #define _fvcInstance (3<<20)
#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
#define isNOP (mVUinfo & (1<<0)) #define isNOP (mVUinfo & (1<<0))
#define isBranch (mVUinfo & (1<<1)) #define isBranch (mVUinfo & (1<<1))
@ -195,6 +195,7 @@ declareAllVariables
#define fvmInstance ((mVUinfo >> 16) & 3) #define fvmInstance ((mVUinfo >> 16) & 3)
#define fvsInstance ((mVUinfo >> 18) & 3) #define fvsInstance ((mVUinfo >> 18) & 3)
#define fvcInstance ((mVUinfo >> 20) & 3) #define fvcInstance ((mVUinfo >> 20) & 3)
#define noWriteVF (mVUinfo & (1<<21))
//#define getFs (mVUinfo & (1<<13)) //#define getFs (mVUinfo & (1<<13))
//#define getFt (mVUinfo & (1<<14)) //#define getFt (mVUinfo & (1<<14))

View File

@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) {
microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
switch( xyzw ) { switch( xyzw ) {
case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X
case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y
case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z
case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W
default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break; default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break;
} }
} }
@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
switch ( xyzw ) { switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // YW break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
break; // YZ break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // YZW break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // XW break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; //XZ break; //XZ
case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
break; //XZW break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1); SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // XYW break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; // XYZ break; // XYZ
case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X
case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y
case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z
case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W
case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY
case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW
default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW
} }
} }