- Gave the rec a better IR (intermediate representation) implementation.
- Renamed microVU_Alloc to microVU_IR


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1298 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-31 05:45:59 +00:00
parent 1694eecf2f
commit eb3b31c525
12 changed files with 248 additions and 252 deletions

View File

@ -2038,10 +2038,6 @@
RelativePath="..\..\x86\microVU.h" RelativePath="..\..\x86\microVU.h"
> >
</File> </File>
<File
RelativePath="..\..\x86\microVU_Alloc.h"
>
</File>
<File <File
RelativePath="..\..\x86\microVU_Alloc.inl" RelativePath="..\..\x86\microVU_Alloc.inl"
> >
@ -2062,6 +2058,10 @@
RelativePath="..\..\x86\microVU_Flags.inl" RelativePath="..\..\x86\microVU_Flags.inl"
> >
</File> </File>
<File
RelativePath="..\..\x86\microVU_IR.h"
>
</File>
<File <File
RelativePath="..\..\x86\microVU_Log.inl" RelativePath="..\..\x86\microVU_Log.inl"
> >

View File

@ -24,7 +24,7 @@
#include "VU.h" #include "VU.h"
#include "GS.h" #include "GS.h"
#include "ix86/ix86.h" #include "ix86/ix86.h"
#include "microVU_Alloc.h" #include "microVU_IR.h"
#include "microVU_Misc.h" #include "microVU_Misc.h"
#define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...) #define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...)
@ -80,7 +80,7 @@ struct microProgram {
u8* x86start; // Start of program's rec-cache u8* x86start; // Start of program's rec-cache
u8* x86end; // Limit of program's rec-cache u8* x86end; // Limit of program's rec-cache
microBlockManager* block[progSize/2]; microBlockManager* block[progSize/2];
microAllocInfo<progSize> allocInfo; microIR<progSize> allocInfo;
}; };
#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...) #define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)

View File

@ -537,7 +537,7 @@ microVUt(void) mVUallocFMAC19b(mV, int& Fd) {
//------------------------------------------------------------------ //------------------------------------------------------------------
#define getQreg(reg) { \ #define getQreg(reg) { \
mVUunpack_xyzw(reg, xmmPQ, readQ); \ mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ); \
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/ \ /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/ \
} }
@ -686,7 +686,7 @@ microVUt(void) mVUallocVIa(mV, int GPRreg, int _reg_) {
} }
microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) { microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
if (backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch)
MOV32RtoM((uptr)&mVU->VIbackup[1], GPRreg); MOV32RtoM((uptr)&mVU->VIbackup[1], GPRreg);
mVUallocVIa(mVU, GPRreg, _reg_); mVUallocVIa(mVU, GPRreg, _reg_);
MOV32RtoM((uptr)&mVU->VIbackup[0], GPRreg); MOV32RtoM((uptr)&mVU->VIbackup[0], GPRreg);
@ -702,7 +702,7 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
//------------------------------------------------------------------ //------------------------------------------------------------------
#define getPreg(reg) { \ #define getPreg(reg) { \
mVUunpack_xyzw(reg, xmmPQ, (2 + readP)); \ mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); \
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \
} }

View File

@ -56,13 +56,13 @@
|| (mVUregsTemp.VF[0].y && _Y) \ || (mVUregsTemp.VF[0].y && _Y) \
|| (mVUregsTemp.VF[0].z && _Z) \ || (mVUregsTemp.VF[0].z && _Z) \
|| (mVUregsTemp.VF[0].w && _W)) \ || (mVUregsTemp.VF[0].w && _W)) \
{ mVUinfo |= _swapOps; } \ { mVUinfo.swapOps = 1; } \
} \ } \
} \ } \
} }
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
mVUinfo |= _doStatus; sFLAG.doFlag = 1;
analyzeReg1(Fs); analyzeReg1(Fs);
analyzeReg1(Ft); analyzeReg1(Ft);
analyzeReg2(Fd, 0); analyzeReg2(Fd, 0);
@ -91,7 +91,7 @@ microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
} }
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
mVUinfo |= _doStatus; sFLAG.doFlag = 1;
analyzeReg1(Fs); analyzeReg1(Fs);
analyzeReg3(Ft); analyzeReg3(Ft);
analyzeReg2(Fd, 0); analyzeReg2(Fd, 0);
@ -106,7 +106,7 @@ microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
} }
microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) {
mVUinfo |= _doClip; cFLAG.doFlag = 1;
analyzeReg1(Fs); analyzeReg1(Fs);
analyzeReg4(Ft); analyzeReg4(Ft);
} }
@ -116,18 +116,18 @@ microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) {
//------------------------------------------------------------------ //------------------------------------------------------------------
#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } #define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } }
#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } #define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUlow.writesVI = 1; mVU->VIbackup[0] = reg; } }
#define analyzeVIreg3(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; } } #define analyzeVIreg3(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; } }
microVUt(void) mVUanalyzeIALU1(mV, int Id, int Is, int It) { microVUt(void) mVUanalyzeIALU1(mV, int Id, int Is, int It) {
if (!Id) { mVUinfo |= _isNOP; } if (!Id) { mVUlow.isNOP = 1; }
analyzeVIreg1(Is); analyzeVIreg1(Is);
analyzeVIreg1(It); analyzeVIreg1(It);
analyzeVIreg2(Id, 1); analyzeVIreg2(Id, 1);
} }
microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) { microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
if (!It) { mVUinfo |= _isNOP; } if (!It) { mVUlow.isNOP = 1; }
analyzeVIreg1(Is); analyzeVIreg1(Is);
analyzeVIreg2(It, 1); analyzeVIreg2(It, 1);
} }
@ -148,13 +148,13 @@ microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
|| (mVUregsTemp.VF[0].z && _Y) \ || (mVUregsTemp.VF[0].z && _Y) \
|| (mVUregsTemp.VF[0].w && _Z) \ || (mVUregsTemp.VF[0].w && _Z) \
|| (mVUregsTemp.VF[0].x && _W)) \ || (mVUregsTemp.VF[0].x && _W)) \
{ mVUinfo |= _swapOps; } \ { mVUinfo.swapOps = 1; } \
} \ } \
} \ } \
} }
microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) { microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) {
if (!Ft) { mVUinfo |= _isNOP; } if (!Ft) { mVUlow.isNOP = 1; }
analyzeReg6(Fs); analyzeReg6(Fs);
analyzeReg2(Ft, 1); analyzeReg2(Ft, 1);
} }
@ -176,7 +176,7 @@ microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) {
|| (mVUregsTemp.VF[0].y && (fxf == 1)) \ || (mVUregsTemp.VF[0].y && (fxf == 1)) \
|| (mVUregsTemp.VF[0].z && (fxf == 2)) \ || (mVUregsTemp.VF[0].z && (fxf == 2)) \
|| (mVUregsTemp.VF[0].w && (fxf == 3))) \ || (mVUregsTemp.VF[0].w && (fxf == 3))) \
{ mVUinfo |= _swapOps; } \ { mVUinfo.swapOps = 1; } \
} \ } \
} \ } \
} }
@ -212,7 +212,7 @@ microVUt(void) mVUanalyzeEFU2(mV, int Fs, u8 xCycles) {
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUanalyzeMFP(mV, int Ft) { microVUt(void) mVUanalyzeMFP(mV, int Ft) {
if (!Ft) { mVUinfo |= _isNOP; } if (!Ft) { mVUlow.isNOP = 1; }
analyzeReg2(Ft, 1); analyzeReg2(Ft, 1);
} }
@ -221,7 +221,7 @@ microVUt(void) mVUanalyzeMFP(mV, int Ft) {
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) { microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) {
if (!Ft || (Ft == Fs)) { mVUinfo |= _isNOP; } if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; }
analyzeReg1b(Fs); analyzeReg1b(Fs);
analyzeReg2(Ft, 1); analyzeReg2(Ft, 1);
} }
@ -234,7 +234,7 @@ microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) {
microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) {
analyzeVIreg1(Is); analyzeVIreg1(Is);
analyzeReg2(Ft, 1); analyzeReg2(Ft, 1);
if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; } if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } }
if (writeIs) { analyzeVIreg2(Is, 1); } if (writeIs) { analyzeVIreg2(Is, 1); }
} }
@ -260,7 +260,7 @@ microVUt(void) mVUanalyzeR1(mV, int Fs, int Fsf) {
} }
microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); } if (!Ft) { if (canBeNOP) { mVUlow.isNOP = 1; } else { mVUlow.noWriteVF = 1; } }
analyzeReg2(Ft, 1); analyzeReg2(Ft, 1);
analyzeRreg(); analyzeRreg();
} }
@ -270,13 +270,13 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUanalyzeSflag(mV, int It) { microVUt(void) mVUanalyzeSflag(mV, int It) {
if (!It) { mVUinfo |= _isNOP; } if (!It) { mVUlow.isNOP = 1; }
else { else {
mVUinfo |= _swapOps; mVUinfo.swapOps = 1;
mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf /*<< mVUcount*/; } if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf /*<< mVUcount*/; }
if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } if (mVUcount >= 1) { incPC2(-2); mVUlow.useSflag = 1; incPC2(2); }
// Note: _isSflag is used for status flag optimizations. // Note: useSflag is used for status flag optimizations when a FSSET instruction is called.
// Do to stalls, it can only be set one instruction prior to the status flag read instruction // Do to stalls, it can only be set one instruction prior to the status flag read instruction
// if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior.
} }
@ -284,7 +284,7 @@ microVUt(void) mVUanalyzeSflag(mV, int It) {
} }
microVUt(void) mVUanalyzeFSSET(mV) { microVUt(void) mVUanalyzeFSSET(mV) {
mVUinfo |= _isFSSET; mVUlow.isFSSET = 1;
// mVUinfo &= ~_doStatus; // mVUinfo &= ~_doStatus;
// Note: I'm not entirely sure if the non-sticky flags // Note: I'm not entirely sure if the non-sticky flags
// should be taken from the current upper instruction // should be taken from the current upper instruction
@ -297,14 +297,14 @@ microVUt(void) mVUanalyzeFSSET(mV) {
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUanalyzeMflag(mV, int Is, int It) { microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
if (!It) { mVUinfo |= _isNOP; } if (!It) { mVUlow.isNOP = 1; }
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
mVUinfo |= _swapOps; mVUinfo.swapOps = 1;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); } if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); }
int curPC = iPC; int curPC = iPC;
for (int i = mVUcount, j = 0; i > 0; i--, j++) { for (int i = mVUcount, j = 0; i > 0; i--, j++) {
incPC2(-2); incPC2(-2);
if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } if (sFLAG.doFlag) { mFLAG.doFlag = 1; if (j >= 3) { break; } }
} }
iPC = curPC; iPC = curPC;
} }
@ -317,7 +317,7 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUanalyzeCflag(mV, int It) { microVUt(void) mVUanalyzeCflag(mV, int It) {
mVUinfo |= _swapOps; mVUinfo.swapOps = 1;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 8); } if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 8); }
analyzeVIreg3(It, 1); analyzeVIreg3(It, 1);
} }
@ -345,24 +345,26 @@ microVUt(void) mVUanalyzeXGkick(mV, int Fs, int xCycles) {
// Branches - Branch Opcodes // Branches - Branch Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
#define analyzeBranchVI(reg, infoVal) { \ #define analyzeBranchVI(reg, infoVar) { \
if (reg && (mVUcount > 0)) { /* Ensures branch is not first opcode in block */ \ /* First ensure branch is not first opcode in block */ \
incPC2(-2); \ if (reg && (mVUcount > 0)) { \
if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \ incPC2(-2); \
mVUinfo |= _backupVI; \ /* Check if prev Op modified VI reg */ \
incPC2(2); \ if (mVUlow.writesVI && (reg == mVU->VIbackup[0])) { \
mVUinfo |= infoVal; \ mVUlow.backupVI = 1; \
} \ incPC2(2); \
else { incPC2(2); } \ infoVar = 1; \
} \ } \
else { incPC2(2); } \
} \
} }
microVUt(void) mVUanalyzeBranch1(mV, int Is) { microVUt(void) mVUanalyzeBranch1(mV, int Is) {
if (mVUregs.VI[Is] || mVUstall) { analyzeVIreg1(Is); } if (mVUregs.VI[Is] || mVUstall) { analyzeVIreg1(Is); }
else { analyzeBranchVI(Is, _memReadIs); } else { analyzeBranchVI(Is, mVUlow.memReadIs); }
} }
microVUt(void) mVUanalyzeBranch2(mV, int Is, int It) { microVUt(void) mVUanalyzeBranch2(mV, int Is, int It) {
if (mVUregs.VI[Is] || mVUregs.VI[It] || mVUstall) { analyzeVIreg1(Is); analyzeVIreg1(It); } if (mVUregs.VI[Is] || mVUregs.VI[It] || mVUstall) { analyzeVIreg1(Is); analyzeVIreg1(It); }
else { analyzeBranchVI(Is, _memReadIs); analyzeBranchVI(It, _memReadIt);} else { analyzeBranchVI(Is, mVUlow.memReadIs); analyzeBranchVI(It, mVUlow.memReadIt);}
} }

View File

@ -36,7 +36,7 @@
#define branchWarning() { \ #define branchWarning() { \
if (mVUbranch) { \ if (mVUbranch) { \
Console::Error("microVU%d Warning: Branch in E-bit/Branch delay slot! [%04x]", params vuIndex, xPC); \ Console::Error("microVU%d Warning: Branch in E-bit/Branch delay slot! [%04x]", params vuIndex, xPC); \
mVUinfo |= _isNOP; \ mVUlow.isNOP = 1; \
} \ } \
} }
@ -49,14 +49,19 @@
} \ } \
} }
#define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } #define startLoop() { \
mVUdebug1(); \
memset(&mVUinfo, 0, sizeof(mVUinfo)); \
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \
}
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define tCycles(dest, src) { dest = aMax(dest, src); } #define tCycles(dest, src) { dest = aMax(dest, src); }
#define incP() { mVU->p = (mVU->p+1) & 1; } #define incP() { mVU->p = (mVU->p+1) & 1; }
#define incQ() { mVU->q = (mVU->q+1) & 1; } #define incQ() { mVU->q = (mVU->q+1) & 1; }
#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } #define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } #define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
#define doIbit() { if (curI & _Ibit_) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } } #define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } }
//------------------------------------------------------------------ //------------------------------------------------------------------
// Helper Functions // Helper Functions
@ -114,7 +119,7 @@ microVUt(void) mVUincCycles(mV, int x) {
calcCycles(mVUregs.VI[z], x); calcCycles(mVUregs.VI[z], x);
} }
if (mVUregs.q) { if (mVUregs.q) {
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo |= _doDivFlag; } } if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } }
else { calcCycles(mVUregs.q, x); } else { calcCycles(mVUregs.q, x); }
if (!mVUregs.q) { incQ(); } if (!mVUregs.q) { incQ(); }
} }
@ -124,7 +129,7 @@ microVUt(void) mVUincCycles(mV, int x) {
} }
if (mVUregs.xgkick) { if (mVUregs.xgkick) {
calcCycles(mVUregs.xgkick, x); calcCycles(mVUregs.xgkick, x);
if (!mVUregs.xgkick) { mVUinfo |= _doXGKICK; } if (!mVUregs.xgkick) { mVUinfo.doXGKICK = 1; }
} }
calcCycles(mVUregs.r, x); calcCycles(mVUregs.r, x);
} }
@ -132,7 +137,8 @@ microVUt(void) mVUincCycles(mV, int x) {
microVUt(void) mVUsetCycles(mV) { microVUt(void) mVUsetCycles(mV) {
incCycles(mVUstall); incCycles(mVUstall);
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP if (mVUregsTemp.r || mVUregsTemp.VI) mVUlow.noWriteVF = 1;
else mVUlow.isNOP = 1; // If lower Op doesn't modify anything else, then make it a NOP
} }
tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].x, mVUregsTemp.VF[0].x); tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].x, mVUregsTemp.VF[0].x);
tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].y, mVUregsTemp.VF[0].y); tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].y, mVUregsTemp.VF[0].y);
@ -241,20 +247,21 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
for (int branch = 0; mVUcount < (vuIndex ? (0x3fff/8) : (0xfff/8)); ) { for (int branch = 0; mVUcount < (vuIndex ? (0x3fff/8) : (0xfff/8)); ) {
incPC(1); incPC(1);
mVUinfo = 0;
startLoop(); startLoop();
incCycles(1); incCycles(1);
mVUopU(mVU, 0); mVUopU(mVU, 0);
if (curI & _Ebit_) { branch = 1; } if (curI & _Ebit_) { branch = 1; mVUup.eBit = 1; }
if (curI & _MDTbit_) { branch = 4; } if (curI & _MDTbit_) { branch = 4; }
if (curI & _Ibit_) { mVUinfo |= _isNOP; } if (curI & _Ibit_) { mVUlow.isNOP = 1; mVUup.iBit = 1; }
else { incPC(-1); mVUopL(mVU, 0); incPC(1); } else { incPC(-1); mVUopL(mVU, 0); incPC(1); }
mVUsetCycles(mVU); mVUsetCycles(mVU);
if (mVU->p) { mVUinfo |= _readP; } mVUinfo.readQ = mVU->q;
if (mVU->q) { mVUinfo |= _readQ; } mVUinfo.writeQ = !mVU->q;
if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); mVUcount++; branchWarning(); break; } mVUinfo.readP = mVU->p;
mVUinfo.writeP = !mVU->p;
if (branch >= 2) { mVUinfo.isEOB = 1; if (branch == 3) { mVUinfo.isBdelay = 1; } mVUcount++; branchWarning(); break; }
else if (branch == 1) { branch = 2; } else if (branch == 1) { branch = 2; }
if (mVUbranch) { mVUsetFlagInfo(mVU); branchEbit(); branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } if (mVUbranch) { mVUsetFlagInfo(mVU); branchEbit(); branch = 3; mVUbranch = 0; }
incPC(1); incPC(1);
mVUcount++; mVUcount++;
} }
@ -270,13 +277,13 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUbranch = 0; mVUbranch = 0;
int x; int x;
for (x = 0; x < (vuIndex ? (0x3fff/8) : (0xfff/8)); x++) { for (x = 0; x < (vuIndex ? (0x3fff/8) : (0xfff/8)); x++) {
if (isEOB) { x = 0xffff; } if (mVUinfo.isEOB) { x = 0xffff; }
if (isNOP) { incPC(1); doUpperOp(); doIbit(); } if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(); }
else if (!swapOps) { incPC(1); doUpperOp(); doLowerOp(); } else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); }
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
if (doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
if (!isBdelay) { incPC(1); } if (!mVUinfo.isBdelay) { incPC(1); }
else { else {
microBlock* bBlock = NULL; microBlock* bBlock = NULL;
u32* ajmp = 0; u32* ajmp = 0;
@ -365,16 +372,16 @@ eBitTemination:
int lStatus = findFlagInst(xStatus, 0x7fffffff); int lStatus = findFlagInst(xStatus, 0x7fffffff);
int lMac = findFlagInst(xMac, 0x7fffffff); int lMac = findFlagInst(xMac, 0x7fffffff);
int lClip = findFlagInst(xClip, 0x7fffffff); int lClip = findFlagInst(xClip, 0x7fffffff);
mVUinfo = 0; memset(&mVUinfo, 0, sizeof(mVUinfo));
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0) incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100; mVUcycles -= 100;
if (doDivFlag) { if (mVUinfo.doDivFlag) {
int flagReg; int flagReg;
getFlagReg(flagReg, lStatus); getFlagReg(flagReg, lStatus);
AND32ItoR (flagReg, 0x0fcf); AND32ItoR (flagReg, 0x0fcf);
OR32MtoR (flagReg, (uptr)&mVU->divFlag); OR32MtoR (flagReg, (uptr)&mVU->divFlag);
} }
if (doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
// Do E-bit end stuff here // Do E-bit end stuff here
mVUsetupRange(mVU, xPC - 8); mVUsetupRange(mVU, xPC - 8);

View File

@ -21,9 +21,9 @@
// Sets FDIV Flags at the proper time // Sets FDIV Flags at the proper time
microVUt(void) mVUdivSet(mV) { microVUt(void) mVUdivSet(mV) {
int flagReg1, flagReg2; int flagReg1, flagReg2;
if (doDivFlag) { if (mVUinfo.doDivFlag) {
getFlagReg(flagReg1, fsInstance); getFlagReg(flagReg1, sFLAG.write);
if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV32RtoR(flagReg1, flagReg2); } if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); }
AND32ItoR(flagReg1, 0x0fcf); AND32ItoR(flagReg1, 0x0fcf);
OR32MtoR (flagReg1, (uptr)&mVU->divFlag); OR32MtoR (flagReg1, (uptr)&mVU->divFlag);
} }
@ -34,19 +34,19 @@ microVUt(void) mVUstatusFlagOp(mV) {
int curPC = iPC; int curPC = iPC;
int i = mVUcount; int i = mVUcount;
bool runLoop = 1; bool runLoop = 1;
if (doStatus) { mVUinfo |= _isSflag; } if (sFLAG.doFlag) { mVUlow.useSflag = 1; }
else { else {
for (; i > 0; i--) { for (; i > 0; i--) {
incPC2(-2); incPC2(-2);
if (isSflag) { runLoop = 0; break; } if (mVUlow.useSflag) { runLoop = 0; break; }
if (doStatus) { mVUinfo |= _isSflag; break; } if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; }
} }
} }
if (runLoop) { if (runLoop) {
for (; i > 0; i--) { for (; i > 0; i--) {
incPC2(-2); incPC2(-2);
if (isSflag) break; if (mVUlow.useSflag) break;
mVUinfo &= ~_doStatus; sFLAG.doFlag = 0;
} }
} }
iPC = curPC; iPC = curPC;
@ -69,7 +69,7 @@ void sortFlag(int* fFlag, int* bFlag, int cycles) {
} }
} }
#define sFlagCond ((doStatus && !mVUsFlagHack) || isFSSET || doDivFlag) #define sFlagCond ((sFLAG.doFlag && !mVUsFlagHack) || mVUlow.isFSSET || mVUinfo.doDivFlag)
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
@ -77,9 +77,9 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
int endPC = iPC; int endPC = iPC;
u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking
// Ensure last ~4+ instructions update mac flags // Ensure last ~4+ instructions update mac flags (if next block's first 4 instructions will read them)
for (int i = mVUcount; i > 0; i--, aCount++) { for (int i = mVUcount; i > 0; i--, aCount++) {
if (doStatus) { if (__Mac) { mVUinfo |= _doMac; } if (aCount >= 4) { break; } } if (sFLAG.doFlag) { if (__Mac) { mFLAG.doFlag = 1; } if (aCount >= 4) { break; } }
incPC2(-2); incPC2(-2);
} }
@ -114,7 +114,7 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
u32 xCount = mVUcount; // Backup count u32 xCount = mVUcount; // Backup count
iPC = mVUstartPC; iPC = mVUstartPC;
for (mVUcount = 0; mVUcount < xCount; mVUcount++) { for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
if (isFSSET) { if (mVUlow.isFSSET) {
if (__Status) { // Don't Optimize out on the last ~4+ instructions if (__Status) { // Don't Optimize out on the last ~4+ instructions
if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp(mVU); } if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp(mVU); }
} }
@ -122,17 +122,21 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
} }
cycles += mVUstall; cycles += mVUstall;
mVUinfo |= findFlagInst(xStatus, cycles) << 18; // _fvsInstance sFLAG.read = findFlagInst(xStatus, cycles);
mVUinfo |= findFlagInst(xMac, cycles) << 16; // _fvmInstance mFLAG.read = findFlagInst(xMac, cycles);
mVUinfo |= findFlagInst(xClip, cycles) << 20; // _fvcInstance cFLAG.read = findFlagInst(xClip, cycles);
sFLAG.write = xS;
mFLAG.write = xM;
cFLAG.write = xC;
mVUinfo |= xS << 12; // _fsInstance sFLAG.lastWrite = (xS-1) & 3;
mVUinfo |= xM << 10; // _fmInstance mFLAG.lastWrite = (xM-1) & 3;
mVUinfo |= xC << 14; // _fcInstance cFLAG.lastWrite = (xC-1) & 3;
if (sFlagCond) { xStatus[xS] = cycles + 4; xS = (xS+1) & 3; } if (sFlagCond) { xStatus[xS] = cycles + 4; xS = (xS+1) & 3; }
if (doMac) { xMac [xM] = cycles + 4; xM = (xM+1) & 3; } if (mFLAG.doFlag) { xMac [xM] = cycles + 4; xM = (xM+1) & 3; }
if (doClip) { xClip [xC] = cycles + 4; xC = (xC+1) & 3; } if (cFLAG.doFlag) { xClip [xC] = cycles + 4; xC = (xC+1) & 3; }
cycles++; cycles++;
incPC2(2); incPC2(2);

View File

@ -65,17 +65,58 @@ struct microBlock {
u8* x86ptrStart; // Start of code u8* x86ptrStart; // Start of code
}; };
struct microUpperOp {
bool eBit; // Has E-bit set
bool iBit; // Has I-bit set
};
struct microLowerOp {
bool isNOP; // This instruction is a NOP
bool isFSSET; // This instruction is a FSSET
bool useSflag; // This instruction uses/reads Sflag
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
bool memReadIt; // Read If (VI reg) from memory (used by branches)
bool writesVI; // Current Instruction writes to VI (used by branches; note that flag-modifying opcodes shouldn't set this)
};
struct microFlagInst {
bool doFlag; // Update Flag on this Instruction
u8 write; // Points to the instance that should be written to (s-stage write)
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
};
struct microOp {
u8 stall; // Info on how much current instruction stalled
bool isEOB; // Cur Instruction is last instruction in block (End of Block)
bool isBdelay; // Cur Instruction in Branch Delay slot
bool swapOps; // Run Lower Instruction before Upper Instruction
bool doXGKICK; // Do XGKICK transfer on this instruction
bool doDivFlag; // Transfer Div flag to Status Flag on this instruction
int readQ; // Q instance for reading
int writeQ; // Q instance for writing
int readP; // P instance for reading
int writeP; // P instance for writing
microFlagInst sFlag; // Status Flag Instance Info
microFlagInst mFlag; // Mac Flag Instance Info
microFlagInst cFlag; // Clip Flag Instance Info
microUpperOp uOp; // Upper Op Info
microLowerOp lOp; // Lower Op Info
};
template<u32 pSize> template<u32 pSize>
struct microAllocInfo { struct microIR {
microBlock* pBlock; // Pointer to a block in mVUblocks microBlock* pBlock; // Pointer to a block in mVUblocks
microBlock block; // Block/Pipeline info microBlock block; // Block/Pipeline info
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR microOp info[pSize/2]; // Info for Instructions in current block
u8 branch;
u32 cycles; // Cycles for current block u32 cycles; // Cycles for current block
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block u32 startPC; // Start PC for Cur Block
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
u32 info[pSize/2]; // Info for Instructions in current block
u8 stall[pSize/2]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
}; };

View File

@ -46,8 +46,7 @@ microVUx(void) __mVUdumpProgram(int progIndex) {
int bBranch = mVUbranch; int bBranch = mVUbranch;
int bCode = mVU->code; int bCode = mVU->code;
int bPC = iPC; int bPC = iPC;
vuIndex = (mVU == &microVU1) ? 1 : 0; mVUbranch = 0;
mVUbranch = 0;
sprintf(str, "%s\\microVU%d prog - %02d.html", LOGS_DIR, vuIndex, progIndex); sprintf(str, "%s\\microVU%d prog - %02d.html", LOGS_DIR, vuIndex, progIndex);
mVU->logFile = fopen(str, "w"); mVU->logFile = fopen(str, "w");

View File

@ -71,9 +71,9 @@ mVUop(mVU_DIV) {
if (CHECK_VU_OVERFLOW) mVUclamp1(xmmFs, xmmFt, 8); if (CHECK_VU_OVERFLOW) mVUclamp1(xmmFs, xmmFt, 8);
x86SetJ8(djmp); x86SetJ8(djmp);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
} }
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); } pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
} }
@ -89,9 +89,9 @@ mVUop(mVU_SQRT) {
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive) if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
} }
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); } pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
} }
@ -127,9 +127,9 @@ mVUop(mVU_RSQRT) {
if (CHECK_VU_OVERFLOW) mVUclamp1(xmmFs, xmmFt, 8); if (CHECK_VU_OVERFLOW) mVUclamp1(xmmFs, xmmFt, 8);
x86SetJ8(djmp); x86SetJ8(djmp);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
} }
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); } pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
} }
@ -162,14 +162,14 @@ microVUt(void) mVU_EATAN_(mV) {
EATANhelper(mVU_T8); EATANhelper(mVU_T8);
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_Pi4); SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_Pi4);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6);
} }
mVUop(mVU_EATAN) { mVUop(mVU_EATAN) {
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); } pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); }
pass2 { pass2 {
getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFs, _Fs_, _Fsf_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
@ -186,7 +186,7 @@ mVUop(mVU_EATANxy) {
pass2 { pass2 {
getReg6(xmmFt, _Fs_); getReg6(xmmFt, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt); // y-x, not y-1? >< SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt); // y-x, not y-1? ><
@ -203,7 +203,7 @@ mVUop(mVU_EATANxz) {
pass2 { pass2 {
getReg6(xmmFt, _Fs_); getReg6(xmmFt, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt); SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt);
@ -226,7 +226,7 @@ mVUop(mVU_EEXP) {
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); } pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); }
pass2 { pass2 {
getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFs, _Fs_, _Fsf_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1); SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1);
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
@ -249,7 +249,7 @@ mVUop(mVU_EEXP) {
SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ); SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("EEXP P"); } pass3 { mVUlog("EEXP P"); }
} }
@ -274,10 +274,10 @@ mVUop(mVU_ELENG) {
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
pass2 { pass2 {
getReg6(xmmFs, _Fs_); getReg6(xmmFs, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(); mVU_sumXYZ();
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ELENG P"); } pass3 { mVUlog("ELENG P"); }
} }
@ -286,12 +286,12 @@ mVUop(mVU_ERCPR) {
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
pass2 { pass2 {
getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFs, _Fs_, _Fsf_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ERCPR P"); } pass3 { mVUlog("ERCPR P"); }
} }
@ -300,13 +300,13 @@ mVUop(mVU_ERLENG) {
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); } pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); }
pass2 { pass2 {
getReg6(xmmFs, _Fs_); getReg6(xmmFs, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(); mVU_sumXYZ();
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ERLENG P"); } pass3 { mVUlog("ERLENG P"); }
} }
@ -315,13 +315,13 @@ mVUop(mVU_ERSADD) {
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
pass2 { pass2 {
getReg6(xmmFs, _Fs_); getReg6(xmmFs, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(); mVU_sumXYZ();
//SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad? //SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad?
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ERSADD P"); } pass3 { mVUlog("ERSADD P"); }
} }
@ -330,12 +330,12 @@ mVUop(mVU_ERSQRT) {
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); } pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); }
pass2 { pass2 {
getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFs, _Fs_, _Fsf_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ERSQRT P"); } pass3 { mVUlog("ERSQRT P"); }
} }
@ -344,9 +344,9 @@ mVUop(mVU_ESADD) {
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); } pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); }
pass2 { pass2 {
getReg6(xmmFs, _Fs_); getReg6(xmmFs, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(); mVU_sumXYZ();
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ESADD P"); } pass3 { mVUlog("ESADD P"); }
} }
@ -362,7 +362,7 @@ mVUop(mVU_ESIN) {
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); } pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); }
pass2 { pass2 {
getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFs, _Fs_, _Fsf_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
//SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant? //SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant?
SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmFs); SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmFs);
@ -379,7 +379,7 @@ mVUop(mVU_ESIN) {
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt);
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5); SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5);
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1); SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ESIN P"); } pass3 { mVUlog("ESIN P"); }
} }
@ -388,9 +388,9 @@ mVUop(mVU_ESQRT) {
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
pass2 { pass2 {
getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFs, _Fs_, _Fsf_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ESQRT P"); } pass3 { mVUlog("ESQRT P"); }
} }
@ -399,13 +399,13 @@ mVUop(mVU_ESUM) {
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); } pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); }
pass2 { pass2 {
getReg6(xmmFs, _Fs_); getReg6(xmmFs, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x1b); SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x1b);
SSE_ADDPS_XMM_to_XMM(xmmFs, xmmFt); SSE_ADDPS_XMM_to_XMM(xmmFs, xmmFt);
SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x01); SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x01);
SSE_ADDSS_XMM_to_XMM(xmmFs, xmmFt); SSE_ADDSS_XMM_to_XMM(xmmFs, xmmFt);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
} }
pass3 { mVUlog("ESUM P"); } pass3 { mVUlog("ESUM P"); }
} }
@ -417,7 +417,7 @@ mVUop(mVU_ESUM) {
mVUop(mVU_FCAND) { mVUop(mVU_FCAND) {
pass1 { mVUanalyzeCflag(mVU, 1); } pass1 { mVUanalyzeCflag(mVU, 1); }
pass2 { pass2 {
mVUallocCFLAGa(mVU, gprT1, fvcInstance); mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
AND32ItoR(gprT1, _Imm24_); AND32ItoR(gprT1, _Imm24_);
ADD32ItoR(gprT1, 0xffffff); ADD32ItoR(gprT1, 0xffffff);
SHR32ItoR(gprT1, 24); SHR32ItoR(gprT1, 24);
@ -430,7 +430,7 @@ mVUop(mVU_FCAND) {
mVUop(mVU_FCEQ) { mVUop(mVU_FCEQ) {
pass1 { mVUanalyzeCflag(mVU, 1); } pass1 { mVUanalyzeCflag(mVU, 1); }
pass2 { pass2 {
mVUallocCFLAGa(mVU, gprT1, fvcInstance); mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
XOR32ItoR(gprT1, _Imm24_); XOR32ItoR(gprT1, _Imm24_);
SUB32ItoR(gprT1, 1); SUB32ItoR(gprT1, 1);
SHR32ItoR(gprT1, 31); SHR32ItoR(gprT1, 31);
@ -443,7 +443,7 @@ mVUop(mVU_FCEQ) {
mVUop(mVU_FCGET) { mVUop(mVU_FCGET) {
pass1 { mVUanalyzeCflag(mVU, _It_); } pass1 { mVUanalyzeCflag(mVU, _It_); }
pass2 { pass2 {
mVUallocCFLAGa(mVU, gprT1, fvcInstance); mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
AND32ItoR(gprT1, 0xfff); AND32ItoR(gprT1, 0xfff);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
} }
@ -454,7 +454,7 @@ mVUop(mVU_FCGET) {
mVUop(mVU_FCOR) { mVUop(mVU_FCOR) {
pass1 { mVUanalyzeCflag(mVU, 1); } pass1 { mVUanalyzeCflag(mVU, 1); }
pass2 { pass2 {
mVUallocCFLAGa(mVU, gprT1, fvcInstance); mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
OR32ItoR(gprT1, _Imm24_); OR32ItoR(gprT1, _Imm24_);
ADD32ItoR(gprT1, 1); // If 24 1's will make 25th bit 1, else 0 ADD32ItoR(gprT1, 1); // If 24 1's will make 25th bit 1, else 0
SHR32ItoR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg) SHR32ItoR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
@ -465,10 +465,10 @@ mVUop(mVU_FCOR) {
} }
mVUop(mVU_FCSET) { mVUop(mVU_FCSET) {
pass1 { mVUinfo |= _doClip; } pass1 { cFLAG.doFlag = 1; }
pass2 { pass2 {
MOV32ItoR(gprT1, _Imm24_); MOV32ItoR(gprT1, _Imm24_);
mVUallocCFLAGb(mVU, gprT1, fcInstance); mVUallocCFLAGb(mVU, gprT1, cFLAG.write);
} }
pass3 { mVUlog("FCSET $%x", _Imm24_); } pass3 { mVUlog("FCSET $%x", _Imm24_); }
} }
@ -480,7 +480,7 @@ mVUop(mVU_FCSET) {
mVUop(mVU_FMAND) { mVUop(mVU_FMAND) {
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); } pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
pass2 { pass2 {
mVUallocMFLAGa(mVU, gprT1, fvmInstance); mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
AND16RtoR(gprT1, gprT2); AND16RtoR(gprT1, gprT2);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
@ -492,7 +492,7 @@ mVUop(mVU_FMAND) {
mVUop(mVU_FMEQ) { mVUop(mVU_FMEQ) {
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); } pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
pass2 { pass2 {
mVUallocMFLAGa(mVU, gprT1, fvmInstance); mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
XOR32RtoR(gprT1, gprT2); XOR32RtoR(gprT1, gprT2);
SUB32ItoR(gprT1, 1); SUB32ItoR(gprT1, 1);
@ -506,7 +506,7 @@ mVUop(mVU_FMEQ) {
mVUop(mVU_FMOR) { mVUop(mVU_FMOR) {
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); } pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
pass2 { pass2 {
mVUallocMFLAGa(mVU, gprT1, fvmInstance); mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
OR16RtoR(gprT1, gprT2); OR16RtoR(gprT1, gprT2);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
@ -522,7 +522,7 @@ mVUop(mVU_FMOR) {
mVUop(mVU_FSAND) { mVUop(mVU_FSAND) {
pass1 { mVUanalyzeSflag(mVU, _It_); } pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 { pass2 {
mVUallocSFLAGa(gprT1, fvsInstance); mVUallocSFLAGa(gprT1, sFLAG.read);
AND16ItoR(gprT1, _Imm12_); AND16ItoR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
} }
@ -533,7 +533,7 @@ mVUop(mVU_FSAND) {
mVUop(mVU_FSEQ) { mVUop(mVU_FSEQ) {
pass1 { mVUanalyzeSflag(mVU, _It_); } pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 { pass2 {
mVUallocSFLAGa(gprT1, fvsInstance); mVUallocSFLAGa(gprT1, sFLAG.read);
XOR16ItoR(gprT1, _Imm12_); XOR16ItoR(gprT1, _Imm12_);
SUB16ItoR(gprT1, 1); SUB16ItoR(gprT1, 1);
SHR16ItoR(gprT1, 15); SHR16ItoR(gprT1, 15);
@ -546,7 +546,7 @@ mVUop(mVU_FSEQ) {
mVUop(mVU_FSOR) { mVUop(mVU_FSOR) {
pass1 { mVUanalyzeSflag(mVU, _It_); } pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 { pass2 {
mVUallocSFLAGa(gprT1, fvsInstance); mVUallocSFLAGa(gprT1, sFLAG.read);
OR16ItoR(gprT1, _Imm12_); OR16ItoR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
} }
@ -558,8 +558,8 @@ mVUop(mVU_FSSET) {
pass1 { mVUanalyzeFSSET(mVU); } pass1 { mVUanalyzeFSSET(mVU); }
pass2 { pass2 {
int flagReg1, flagReg2; int flagReg1, flagReg2;
getFlagReg(flagReg1, fsInstance); getFlagReg(flagReg1, sFLAG.write);
if (!(doStatus||doDivFlag)) { getFlagReg(flagReg2, fpsInstance); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction
AND32ItoR(flagReg1, 0x03f); AND32ItoR(flagReg1, 0x03f);
OR32ItoR (flagReg1, (_Imm12_ & 0xfc0)); OR32ItoR (flagReg1, (_Imm12_ & 0xfc0));
} }
@ -664,7 +664,7 @@ mVUop(mVU_ISUBIU) {
//------------------------------------------------------------------ //------------------------------------------------------------------
mVUop(mVU_MFIR) { mVUop(mVU_MFIR) {
pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Is_); analyzeReg2(_Ft_, 1); } pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_); analyzeReg2(_Ft_, 1); }
pass2 { pass2 {
mVUallocVIa(mVU, gprT1, _Is_); mVUallocVIa(mVU, gprT1, _Is_);
MOVSX32R16toR(gprT1, gprT1); MOVSX32R16toR(gprT1, gprT1);
@ -704,7 +704,7 @@ mVUop(mVU_MR32) {
} }
mVUop(mVU_MTIR) { mVUop(mVU_MTIR) {
pass1 { if (!_It_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_It_, 1); } pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_It_, 1); }
pass2 { pass2 {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
@ -717,7 +717,7 @@ mVUop(mVU_MTIR) {
//------------------------------------------------------------------ //------------------------------------------------------------------
mVUop(mVU_ILW) { mVUop(mVU_ILW) {
pass1 { if (!_It_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Is_); analyzeVIreg2(_It_, 4); } pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_); analyzeVIreg2(_It_, 4); }
pass2 { pass2 {
if (!_Is_) { if (!_Is_) {
MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS); MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS);
@ -735,7 +735,7 @@ mVUop(mVU_ILW) {
} }
mVUop(mVU_ILWR) { mVUop(mVU_ILWR) {
pass1 { if (!_It_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Is_); analyzeVIreg2(_It_, 4); } pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_); analyzeVIreg2(_It_, 4); }
pass2 { pass2 {
if (!_Is_) { if (!_Is_) {
MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS);
@ -828,7 +828,7 @@ mVUop(mVU_LQ) {
mVUop(mVU_LQD) { mVUop(mVU_LQD) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); } pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); }
pass2 { pass2 {
if (!_Is_ && !noWriteVF) { if (!_Is_ && !mVUlow.noWriteVF) {
mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
} }
@ -836,7 +836,7 @@ mVUop(mVU_LQD) {
mVUallocVIa(mVU, gprT1, _Is_); mVUallocVIa(mVU, gprT1, _Is_);
SUB16ItoR(gprT1, 1); SUB16ItoR(gprT1, 1);
mVUallocVIb(mVU, gprT1, _Is_); mVUallocVIb(mVU, gprT1, _Is_);
if (!noWriteVF) { if (!mVUlow.noWriteVF) {
mVUaddrFix(mVU, gprT1); mVUaddrFix(mVU, gprT1);
mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
@ -849,13 +849,13 @@ mVUop(mVU_LQD) {
mVUop(mVU_LQI) { mVUop(mVU_LQI) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); } pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); }
pass2 { pass2 {
if (!_Is_ && !noWriteVF) { if (!_Is_ && !mVUlow.noWriteVF) {
mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1);
} }
else { else {
mVUallocVIa(mVU, (!noWriteVF) ? gprT1 : gprT2, _Is_); mVUallocVIa(mVU, (!mVUlow.noWriteVF) ? gprT1 : gprT2, _Is_);
if (!noWriteVF) { if (!mVUlow.noWriteVF) {
MOV32RtoR(gprT2, gprT1); MOV32RtoR(gprT2, gprT1);
mVUaddrFix(mVU, gprT1); mVUaddrFix(mVU, gprT1);
mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
@ -948,7 +948,7 @@ mVUop(mVU_RINIT) {
} }
microVUt(void) mVU_RGET_(mV, int Rreg) { microVUt(void) mVU_RGET_(mV, int Rreg) {
if (!noWriteVF) { if (!mVUlow.noWriteVF) {
if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], Rreg); if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], Rreg);
if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], Rreg); if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], Rreg);
if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], Rreg); if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], Rreg);
@ -1017,7 +1017,7 @@ mVUop(mVU_WAITQ) {
//------------------------------------------------------------------ //------------------------------------------------------------------
mVUop(mVU_XTOP) { mVUop(mVU_XTOP) {
pass1 { if (!_It_) { mVUinfo |= _isNOP; } analyzeVIreg2(_It_, 1); } pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeVIreg2(_It_, 1); }
pass2 { pass2 {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->top); MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->top);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
@ -1026,7 +1026,7 @@ mVUop(mVU_XTOP) {
} }
mVUop(mVU_XITOP) { mVUop(mVU_XITOP) {
pass1 { if (!_It_) { mVUinfo |= _isNOP; } analyzeVIreg2(_It_, 1); } pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeVIreg2(_It_, 1); }
pass2 { pass2 {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->itop); MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->itop);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
@ -1073,8 +1073,8 @@ microVUt(void) mVU_XGKICK_DELAY(mV, bool memVI) {
mVUop(mVU_XGKICK) { mVUop(mVU_XGKICK) {
pass1 { mVUanalyzeXGkick(mVU, _Is_, mVU_XGKICK_CYCLES); } pass1 { mVUanalyzeXGkick(mVU, _Is_, mVU_XGKICK_CYCLES); }
pass2 { pass2 {
if (!mVU_XGKICK_CYCLES) { mVU_XGKICK_DELAY(mVU, 0); return; } if (!mVU_XGKICK_CYCLES) { mVU_XGKICK_DELAY(mVU, 0); return; }
else if (doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); mVUinfo &= ~_doXGKICK; } else if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); mVUinfo.doXGKICK = 0; }
mVUallocVIa(mVU, gprT1, _Is_); mVUallocVIa(mVU, gprT1, _Is_);
MOV32RtoM((uptr)&mVU->VIxgkick, gprT1); MOV32RtoM((uptr)&mVU->VIxgkick, gprT1);
} }
@ -1086,7 +1086,7 @@ mVUop(mVU_XGKICK) {
//------------------------------------------------------------------ //------------------------------------------------------------------
#define setBranchA(x, _x_) { \ #define setBranchA(x, _x_) { \
pass1 { if (_Imm11_ == 1 && !_x_) { mVUinfo |= _isNOP; return; } mVUbranch = x; } \ pass1 { if (_Imm11_ == 1 && !_x_) { mVUlow.isNOP = 1; return; } mVUbranch = x; } \
pass2 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; } \ pass2 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; } \
pass3 { mVUbranch = x; } \ pass3 { mVUbranch = x; } \
pass4 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; } \ pass4 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; } \
@ -1111,9 +1111,9 @@ mVUop(mVU_IBEQ) {
setBranchA(3, 0); setBranchA(3, 0);
pass1 { mVUanalyzeBranch2(mVU, _Is_, _It_); } pass1 { mVUanalyzeBranch2(mVU, _Is_, _It_); }
pass2 { pass2 {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa(mVU, gprT1, _Is_); else mVUallocVIa(mVU, gprT1, _Is_);
if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); } else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); }
MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32RtoM((uptr)&mVU->branch, gprT1);
} }
@ -1124,7 +1124,7 @@ mVUop(mVU_IBGEZ) {
setBranchA(4, 0); setBranchA(4, 0);
pass1 { mVUanalyzeBranch1(mVU, _Is_); } pass1 { mVUanalyzeBranch1(mVU, _Is_); }
pass2 { pass2 {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa(mVU, gprT1, _Is_); else mVUallocVIa(mVU, gprT1, _Is_);
MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32RtoM((uptr)&mVU->branch, gprT1);
} }
@ -1135,7 +1135,7 @@ mVUop(mVU_IBGTZ) {
setBranchA(5, 0); setBranchA(5, 0);
pass1 { mVUanalyzeBranch1(mVU, _Is_); } pass1 { mVUanalyzeBranch1(mVU, _Is_); }
pass2 { pass2 {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa(mVU, gprT1, _Is_); else mVUallocVIa(mVU, gprT1, _Is_);
MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32RtoM((uptr)&mVU->branch, gprT1);
} }
@ -1146,7 +1146,7 @@ mVUop(mVU_IBLEZ) {
setBranchA(6, 0); setBranchA(6, 0);
pass1 { mVUanalyzeBranch1(mVU, _Is_); } pass1 { mVUanalyzeBranch1(mVU, _Is_); }
pass2 { pass2 {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa(mVU, gprT1, _Is_); else mVUallocVIa(mVU, gprT1, _Is_);
MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32RtoM((uptr)&mVU->branch, gprT1);
} }
@ -1157,7 +1157,7 @@ mVUop(mVU_IBLTZ) {
setBranchA(7, 0); setBranchA(7, 0);
pass1 { mVUanalyzeBranch1(mVU, _Is_); } pass1 { mVUanalyzeBranch1(mVU, _Is_); }
pass2 { pass2 {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa(mVU, gprT1, _Is_); else mVUallocVIa(mVU, gprT1, _Is_);
MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32RtoM((uptr)&mVU->branch, gprT1);
} }
@ -1168,9 +1168,9 @@ mVUop(mVU_IBNE) {
setBranchA(8, 0); setBranchA(8, 0);
pass1 { mVUanalyzeBranch2(mVU, _Is_, _It_); } pass1 { mVUanalyzeBranch2(mVU, _Is_, _It_); }
pass2 { pass2 {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa(mVU, gprT1, _Is_); else mVUallocVIa(mVU, gprT1, _Is_);
if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); if (mVUlow.memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); } else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); }
MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32RtoM((uptr)&mVU->branch, gprT1);
} }

View File

@ -172,8 +172,15 @@ declareAllVariables
#define mVUregsTemp mVUallocInfo.regsTemp #define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC #define iPC mVUallocInfo.curPC
#define mVUsFlagHack mVUallocInfo.sFlagHack #define mVUsFlagHack mVUallocInfo.sFlagHack
#define mVUinfo mVUallocInfo.info[iPC / 2] #define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUstall mVUallocInfo.stall[iPC / 2] #define mVUstall mVUinfo.stall
#define mVUup mVUinfo.uOp
#define mVUlow mVUinfo.lOp
#define sFLAG mVUinfo.sFlag
#define mFLAG mVUinfo.mFlag
#define cFLAG mVUinfo.cFlag
#define mVUstartPC mVUallocInfo.startPC #define mVUstartPC mVUallocInfo.startPC
#define mVUflagInfo mVUregs.needExactMatch #define mVUflagInfo mVUregs.needExactMatch
#define mVUflagHack (mVUcurProg.sFlagHack) #define mVUflagHack (mVUcurProg.sFlagHack)
@ -194,70 +201,6 @@ declareAllVariables
#define __Mac (mVUflagInfo & (0xf<<4)) #define __Mac (mVUflagInfo & (0xf<<4))
#define __Clip (mVUflagInfo & (0xf<<8)) #define __Clip (mVUflagInfo & (0xf<<8))
// Pass 1 uses these to set mVUinfo
#define _isNOP (1<<0) // Skip Lower Instruction
#define _isBranch (1<<1) // Cur Instruction is a Branch
#define _isEOB (1<<2) // End of Block
#define _isBdelay (1<<3) // Cur Instruction in Branch Delay slot
#define _isSflag (1<<4) // Cur Instruction uses status flag
#define _doXGKICK (1<<5) // Do XGKICK transfer on this instruction
#define _writeQ (1<<6)
#define _readQ (1<<6) // same as writeQ
#define _writeP (1<<7)
#define _readP (1<<7) // same as writeP
#define _doFlags (3<<8)
#define _doMac (1<<8)
#define _doStatus (1<<9)
#define _fmInstance (3<<10) // Mac Write Instance
#define _fsInstance (3<<12) // Status Write Instance
#define _fcInstance (3<<14) // Clip Write Instance
#define _fpsInstance (3<<12) // Prev.S. Write Instance
#define _fpcInstance (3<<14) // Prev.C. Write Instance
#define _fvmInstance (3<<16) // Mac Read Instance (at T-stage for lower instruction)
#define _fvsInstance (3<<18) // Status Read Instance (at T-stage for lower instruction)
#define _fvcInstance (3<<20) // Clip Read Instance (at T-stage for lower instruction)
#define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
#define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches)
#define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches)
#define _writesVI (1<<25) // Current Instruction writes to VI (used by branches; note that flag-modifying opcodes shouldn't set this)
#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction
#define _isFSSET (1<<27) // Cur Instruction is FSSET
#define _doDivFlag (1<<28) // Transfer Div flag to Status Flag
#define _doClip (1<<29)
#define _noWriteVF (1<<30) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
// Pass 2 uses these to read mVUinfo
#define isNOP (mVUinfo & (1<<0))
#define isBranch (mVUinfo & (1<<1))
#define isEOB (mVUinfo & (1<<2))
#define isBdelay (mVUinfo & (1<<3))
#define isSflag (mVUinfo & (1<<4))
#define doXGKICK (mVUinfo & (1<<5))
#define readQ ((mVUinfo >> 6) & 1) // same as writeQ
#define readP ((mVUinfo >> 7) & 1) // same as writeP
#define writeQ (((mVUinfo >> 6) + 1) & 1)
#define writeP (((mVUinfo >> 7) + 1) & 1)
#define doFlags (mVUinfo & (3<<8))
#define doMac (mVUinfo & (1<<8))
#define doStatus (mVUinfo & (1<<9))
#define fmInstance ((mVUinfo >> 10) & 3)
#define fsInstance ((mVUinfo >> 12) & 3)
#define fpsInstance ((((mVUinfo>>12) & 3) - 1) & 0x3)
#define fcInstance ((mVUinfo >> 14) & 3)
#define fpcInstance ((((mVUinfo>>14) & 3) - 1) & 0x3)
#define fvmInstance ((mVUinfo >> 16) & 3)
#define fvsInstance ((mVUinfo >> 18) & 3)
#define fvcInstance ((mVUinfo >> 20) & 3)
#define backupVI (mVUinfo & (1<<22))
#define memReadIs (mVUinfo & (1<<23))
#define memReadIt (mVUinfo & (1<<24))
#define writesVI (mVUinfo & (1<<25))
#define swapOps (mVUinfo & (1<<26))
#define isFSSET (mVUinfo & (1<<27))
#define doDivFlag (mVUinfo & (1<<28))
#define doClip (mVUinfo & (1<<29))
#define noWriteVF (mVUinfo & (1<<30))
// Pass 3 Helper Macros // Pass 3 Helper Macros
#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x"))) #define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x")))
#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x"))) #define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x")))

View File

@ -305,8 +305,8 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) {
mVU->code = mVU->prog.prog[progIndex].data[i]; mVU->code = mVU->prog.prog[progIndex].data[i];
mVUopL(mVU, 3); mVUopL(mVU, 3);
} }
mVUflagInfo = bFlagInfo; mVUflagInfo = bFlagInfo;
mVU->code = bCode; mVU->code = bCode;
mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack; mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack;
} }
} }

View File

@ -22,7 +22,7 @@
// mVUupdateFlags() - Updates status/mac flags // mVUupdateFlags() - Updates status/mac flags
//------------------------------------------------------------------ //------------------------------------------------------------------
#define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (doMac ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) #define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (mFLAG.doFlag ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W])))
#define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0) #define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0)
#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { SHL32ItoR(gprReg, ADD_XYZW); } } #define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { SHL32ItoR(gprReg, ADD_XYZW); } }
@ -32,14 +32,14 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
static u8 *pjmp, *pjmp2; static u8 *pjmp, *pjmp2;
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
//SysPrintf("doStatus = %d; doMac = %d\n", doStatus>>9, doMac>>8); //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
if (mVUsFlagHack) { mVUinfo &= ~_doStatus; } if (mVUsFlagHack) { sFLAG.doFlag = 0; }
if (!doFlags) return; if (!sFLAG.doFlag && !mFLAG.doFlag) { return; }
if (!doMac || (_XYZW_SS && modXYZW)) { regT1 = reg; } if (!mFLAG.doFlag || (_XYZW_SS && modXYZW)) { regT1 = reg; }
else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw
if (doStatus) { if (sFLAG.doFlag) {
getFlagReg(sReg, fsInstance); // Set sReg to valid GPR by Cur Flag Instance getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance
mVUallocSFLAGa(sReg, fpsInstance); // Get Prev Status Flag mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags
} }
@ -54,25 +54,25 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg
AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation
if (doStatus) pjmp = JZ8(0); // Skip if none are if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
if (doMac) SHL32ItoR(mReg, 4 + ADD_XYZW); if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW);
if (doStatus) OR32ItoR(sReg, 0x82); // SS, S flags if (sFLAG.doFlag) OR32ItoR(sReg, 0x82); // SS, S flags
if (_XYZW_SS && doStatus) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
if (doStatus) x86SetJ8(pjmp); if (sFLAG.doFlag) x86SetJ8(pjmp);
//-------------------------Check for Zero flags------------------------------ //-------------------------Check for Zero flags------------------------------
AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
if (doStatus) pjmp = JZ8(0); // Skip if none are if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
if (doMac) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); } if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
if (doStatus) { OR32ItoR(sReg, 0x41); } // ZS, Z flags if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
if (doStatus) x86SetJ8(pjmp); if (sFLAG.doFlag) x86SetJ8(pjmp);
//-------------------------Write back flags------------------------------ //-------------------------Write back flags------------------------------
if (_XYZW_SS && doStatus) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
if (doMac) mVUallocMFLAGb(mVU, mReg, fmInstance); // Set Mac Flag if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -438,9 +438,9 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
} }
// FMAC27~29 - MAX/MINI FMAC Opcodes // FMAC27~29 - MAX/MINI FMAC Opcodes
#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { mVUinfo &= ~_doStatus; } } #define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { mVUinfo &= ~_doStatus; } } #define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { mVUinfo &= ~_doStatus; } } #define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
//------------------------------------------------------------------ //------------------------------------------------------------------
// Micro VU Micromode Upper instructions // Micro VU Micromode Upper instructions
@ -587,7 +587,7 @@ mVUop(mVU_CLIP) {
pass2 { pass2 {
int Fs, Ft; int Fs, Ft;
mVUallocFMAC17a(mVU, Fs, Ft); mVUallocFMAC17a(mVU, Fs, Ft);
mVUallocCFLAGa(mVU, gprT1, fpcInstance); mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite);
SHL32ItoR(gprT1, 6); SHL32ItoR(gprT1, 6);
SSE_ANDPS_M128_to_XMM(Ft, (uptr)mVU_absclip); SSE_ANDPS_M128_to_XMM(Ft, (uptr)mVU_absclip);
@ -611,7 +611,7 @@ mVUop(mVU_CLIP) {
OR32RtoR (gprT1, gprT2); OR32RtoR (gprT1, gprT2);
AND32ItoR(gprT1, 0xffffff); AND32ItoR(gprT1, 0xffffff);
mVUallocCFLAGb(mVU, gprT1, fcInstance); mVUallocCFLAGb(mVU, gprT1, cFLAG.write);
} }
pass3 { mVUlog("CLIP"); mVUlogCLIP(); } pass3 { mVUlog("CLIP"); mVUlogCLIP(); }
} }