-fixed rm instructions to work with Jake's emitter changes
-implemented the case where upper and lower instructions write to same reg at once (the lower instruction's result is discarded)
-implemented more first pass analyzing stuff
-fixed various bugs...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@924 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-08 10:07:14 +00:00
parent 3dd99a0932
commit e3a87fecd9
9 changed files with 157 additions and 110 deletions

View File

@ -41,7 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
mVU->regs = vuRegsPtr;
mVU->index = vuIndex;
mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8;
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000);
mVU->cache = NULL;

View File

@ -91,9 +91,9 @@ public:
template<u32 progSize>
struct microProgram {
u32 data[progSize];
u32 data[progSize/4];
u32 used; // Number of times its been used
microBlockManager* block[progSize / 2];
microBlockManager* block[progSize/8];
microAllocInfo<progSize> allocInfo;
};
@ -116,7 +116,7 @@ struct microVU {
u32 cacheAddr; // VU Cache Start Address
static const u32 cacheSize = 0x500000; // VU Cache Size
microProgManager<0x1000> prog; // Micro Program Data
microProgManager<0x4000> prog; // Micro Program Data
VURegs* regs; // VU Regs Struct
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)

View File

@ -33,6 +33,8 @@ struct microRegInfo {
u8 VI[32];
u8 q;
u8 p;
u8 r;
u8 xgkick;
};
struct microTempRegInfo {
@ -42,6 +44,8 @@ struct microTempRegInfo {
u8 VIreg; // Index of the VI reg
u8 q; // Holds cycle info for Q reg
u8 p; // Holds cycle info for P reg
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
u8 xgkick; // Holds the cycle info for XGkick
};
template<u32 pSize>
@ -49,34 +53,9 @@ struct microAllocInfo {
microRegInfo regs; // Pipeline info
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR)
u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag
u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time.
u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
u32 cycles; // Cycles for current block
u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block
u32 info[pSize]; // bit 00 = Lower Instruction is NOP
// bit 01
// bit 02
// bit 03
// bit 04
// bit 05 = Write to Q1 or Q2?
// bit 06 = Read Q1 or Q2?
// bit 07 = Read/Write to P1 or P2?
// bit 08 = Update Mac Flags?
// bit 09 = Update Status Flags?
// bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance
// bit 11
// bit 12 = Used with bit 13 to make a 2-bit key for status flag instance
// bit 13
// bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance
// bit 15
// bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance
// bit 17
// bit 18 = Used with bit 19 to make a 2-bit key for status flag instance
// bit 19
// bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance
// bit 21
// bit 22 = Read VI(Fs) from backup memory?
// bit 23 = Read VI(Ft) from backup memory?
u32 info[pSize/8]; // Info for Instructions in current block
};

View File

@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) {
// FMAC6 - Normal FMAC Opcodes (I Reg)
//------------------------------------------------------------------
#define getIreg(reg) { \
#define getIreg(reg, modXYZW) { \
MOV32ItoR(gprT1, mVU->iReg); \
SSE2_MOVD_R_to_XMM(reg, gprT1); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
if (!_XYZW_SS) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
}
microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
Fs = xmmFs;
Ft = xmmFt;
Fd = xmmFs;
getIreg(Ft);
getIreg(Ft, 1);
getReg6(Fs, _Fs_);
}
@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) {
ACC = xmmACC;
Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs;
Ft = xmmFt;
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmFs;
ACC = xmmACC;
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) {
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }

View File

@ -143,6 +143,25 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) {
analyzePreg(xCycles);
}
//------------------------------------------------------------------
// R*** - R Reg Opcodes
//------------------------------------------------------------------
#define analyzeRreg() { mVUregsTemp.r = 1; }
microVUt(void) mVUanalyzeR1(int Fs, int Fsf) {
microVU* mVU = mVUx;
analyzeReg5(Fs, Fsf);
analyzeRreg();
}
microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
microVU* mVU = mVUx;
if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; }
analyzeReg2(Ft);
analyzeRreg();
}
//------------------------------------------------------------------
// Sflag - Status Flag Opcodes
//------------------------------------------------------------------
@ -157,4 +176,18 @@ microVUt(void) mVUanalyzeSflag(int It) {
analyzeVIreg2(It, 1);
}
//------------------------------------------------------------------
// XGkick
//------------------------------------------------------------------
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) {
microVU* mVU = mVUx;
analyzeVIreg1(Fs);
analyzeXGkick1();
analyzeXGkick2(xCycles);
}
#endif //PCSX2_MICROVU

View File

@ -29,19 +29,51 @@
} \
}
#define curI mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
#define curI mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; }
#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incCycles(x) { mVUincCycles<vuIndex>(x); }
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
microVUt(void) mVUincCycles(int x) {
mVUcycles += x;
for (int z = 31; z > 0; z--) {
calcCycles(mVUregs.VF[z].x, x);
calcCycles(mVUregs.VF[z].y, x);
calcCycles(mVUregs.VF[z].z, x);
calcCycles(mVUregs.VF[z].w, x);
}
for (int z = 16; z > 0; z--) {
calcCycles(mVUregs.VI[z], x);
}
if (mVUregs.q) {
calcCycles(mVUregs.q, x);
if (!mVUregs.q) {} // Do Status Flag Merging Stuff?
}
calcCycles(mVUregs.p, x);
calcCycles(mVUregs.r, x);
calcCycles(mVUregs.xgkick, x);
}
microVUt(void) mVUsetCycles() {
microVU* mVU = mVUx;
incCycles(mVUstall);
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP
//mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?)
mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?)
mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y);
mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z);
mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w);
}
mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg;
mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg);
mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg;
mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI;
mVUregs.q = mVUregsTemp.q;
mVUregs.p = mVUregsTemp.p;
mVUregs.r = mVUregsTemp.r;
mVUregs.xgkick = mVUregsTemp.xgkick;
}
microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
@ -70,14 +102,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
else if (branch == 1) { branch = 2; }
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
incPC(1);
incCycles(1);
}
// Second Pass
iPC = startPC;
iPC = mVUstartPC;
setCode();
for (bool x = 1; x; ) {
//
// ToDo: status/mac flag stuff
// ToDo: status/mac flag stuff?
//
if (isEOB) { x = 0; }
else if (isBranch) { mVUopU<vuIndex, 1>(); incPC(2); }
@ -85,6 +118,7 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
mVUopU<vuIndex, 1>();
if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
else { incPC(1); mVUopL<vuIndex, 1>(); }
if (!isBdelay) { incPC(1); }
else {
incPC(-2); // Go back to Branch Opcode

View File

@ -648,18 +648,9 @@ microVUf(void) mVU_ISUBIU() {
}
//------------------------------------------------------------------
// MOVE/MFIR/MFP/MTIR/MR32
// MFIR/MFP/MOVE/MR32/MTIR
//------------------------------------------------------------------
microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
else {
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MFIR() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
@ -681,12 +672,12 @@ microVUf(void) mVU_MFP() {
}
}
microVUf(void) mVU_MTIR() {
microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
else {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
@ -700,6 +691,15 @@ microVUf(void) mVU_MR32() {
}
}
microVUf(void) mVU_MTIR() {
microVU* mVU = mVUx;
if (!recPass) {}
else {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
//------------------------------------------------------------------
// ILW/ILWR
//------------------------------------------------------------------
@ -716,7 +716,7 @@ microVUf(void) mVU_ILW() {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
@ -728,13 +728,13 @@ microVUf(void) mVU_ILWR() {
if (!recPass) { /*If (!_Ft_) nop();*/ }
else {
if (!_Fs_) {
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS );
MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
@ -762,10 +762,10 @@ microVUf(void) mVU_ISW() {
mVUallocVIa<vuIndex>(gprT2, _Ft_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
}
}
}
@ -785,10 +785,10 @@ microVUf(void) mVU_ISWR() {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
}
}
}
@ -819,7 +819,7 @@ microVUf(void) mVU_LQD() {
microVU* mVU = mVUx;
if (!recPass) {}
else {
if (!_Fs_ && _Ft_) {
if (!_Fs_ && _Ft_ && !noWriteVF) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
SUB16ItoR(gprT1, 1);
mVUallocVIb<vuIndex>(gprT1, _Fs_); // ToDo: Backup to memory check.
if (_Ft_) {
if (_Ft_ && !noWriteVF) {
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
@ -840,13 +840,13 @@ microVUf(void) mVU_LQI() {
microVU* mVU = mVUx;
if (!recPass) {}
else {
if (!_Fs_ && _Ft_) {
if (!_Fs_ && _Ft_ && !noWriteVF) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
else {
mVUallocVIa<vuIndex>((_Ft_) ? gprT1 : gprT2, _Fs_);
if (_Ft_) {
if (_Ft_ && !noWriteVF) {
MOV32RtoR(gprT2, gprT1);
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
@ -925,7 +925,7 @@ microVUf(void) mVU_SQI() {
microVUf(void) mVU_RINIT() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
else {
if (_Fs_ || (_Fsf_ == 3)) {
getReg8(gprR, _Fs_, _Fsf_);
@ -938,7 +938,7 @@ microVUf(void) mVU_RINIT() {
microVUt(void) mVU_RGET_() {
microVU* mVU = mVUx;
if (_Ft_) {
if (!noWriteVF) {
if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR);
if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR);
if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR);
@ -948,13 +948,13 @@ microVUt(void) mVU_RGET_() {
microVUf(void) mVU_RGET() {
microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 1); }
else { mVU_RGET_<vuIndex>(); }
}
microVUf(void) mVU_RNEXT() {
microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 0); }
else {
// algorithm from www.project-fao.org
MOV32RtoR(gprT1, gprR);
@ -976,7 +976,7 @@ microVUf(void) mVU_RNEXT() {
microVUf(void) mVU_RXOR() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
else {
if (_Fs_ || (_Fsf_ == 3)) {
getReg8(gprT1, _Fs_, _Fsf_);
@ -1039,7 +1039,7 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); }
microVUf(void) mVU_XGKICK() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeXGkick<vuIndex>(_Fs_, 4); }
else {
mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall
if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0);

View File

@ -152,7 +152,6 @@ declareAllVariables
#define mVUstartPC mVUallocInfo.startPC
#define iPC mVUallocInfo.curPC
#define xPC ((iPC / 2) * 8)
#define incCycles(x) { mVUcycles += x; }
#define _isNOP (1<<0) // Skip Lower Instruction
#define _isBranch (1<<1) // Cur Instruction is a Branch
@ -168,12 +167,13 @@ declareAllVariables
#define _doStatus (1<<9)
#define _fmInstance (3<<10)
#define _fsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpmInstance (3<<10)
#define _fpsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpcInstance (3<<14)
#define _fvmInstance (3<<16)
#define _fvsInstance (3<<18)
#define _fvcInstance (3<<14)
#define _fvcInstance (3<<20)
#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
#define isNOP (mVUinfo & (1<<0))
#define isBranch (mVUinfo & (1<<1))
@ -195,6 +195,7 @@ declareAllVariables
#define fvmInstance ((mVUinfo >> 16) & 3)
#define fvsInstance ((mVUinfo >> 18) & 3)
#define fvcInstance ((mVUinfo >> 20) & 3)
#define noWriteVF (mVUinfo & (1<<21))
//#define getFs (mVUinfo & (1<<13))
//#define getFt (mVUinfo & (1<<14))

View File

@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) {
microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
switch( xyzw ) {
case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X
case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y
case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z
case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W
default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break;
case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X
case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y
case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z
case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W
default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break;
}
}
@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; //XZ
case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg);
case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y
case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z
case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W
case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY
case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW
default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW
case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X
case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y
case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z
case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W
case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY
case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW
default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW
}
}