zeroRecs:

-minor change

microVU:
-fixed a lot of various errors
-partially implemented some clip flag stuff
-partially implemented some branch/jump stuff

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@981 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-15 10:13:48 +00:00
parent 5e87ea3127
commit af792b7694
11 changed files with 162 additions and 143 deletions

View File

@ -866,12 +866,10 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
else SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
break;
case 9: // XW
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset);
else SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xff); //WWWW
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12);
else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP);

View File

@ -64,8 +64,8 @@ public:
blockList[listSize].x86ptrStart = x86ptrStart;
}
}*/
microBlock* search(u32 pipelineState, microRegInfo* pState) {
if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State)
microBlock* search(/*u32 pipelineState,*/ microRegInfo* pState) {
/*if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i < listSize; i++) {
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i];
}
@ -74,7 +74,7 @@ public:
for (int i = 0; i < listSize; i++) {
if (blockList[i].pipelineState == pipelineState) return &blockList[i];
}
}
}*/
return NULL;
}
void clearFast() {

View File

@ -35,6 +35,7 @@ struct microRegInfo {
u8 p;
u8 r;
u8 xgkick;
u8 needExactMatch; // This block needs an exact match of pipeline state
};
struct microTempRegInfo {

View File

@ -700,12 +700,12 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) {
microVUt(void) mVUallocCFLAGa(int reg, int fInstance) {
microVU* mVU = mVUx;
MOV32MtoR(reg, mVU->clipFlag[fInstance]);
MOV32MtoR(reg, (uptr)&mVU->clipFlag[fInstance]);
}
microVUt(void) mVUallocCFLAGb(int reg, int fInstance) {
microVU* mVU = mVUx;
MOV32RtoM(mVU->clipFlag[fInstance], reg);
MOV32RtoM((uptr)&mVU->clipFlag[fInstance], reg);
}
//------------------------------------------------------------------

View File

@ -247,8 +247,10 @@ microVUt(void) mVUanalyzeSflag(int It) {
if (!It) { mVUinfo |= _isNOP; }
else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from
mVUinfo |= _swapOps;
if (mVUcount >= 4) { incPC2(-8); mVUinfo |= _isSflag; incPC2(8); }
//else { incPC2((mVUcount*-2)); mVUinfo |= _isSflag; incPC2(mVUcount*-2); }
if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); }
// Note: _isSflag is used for status flag optimizations.
// Do to stalls, it can only be set one instruction prior to the status flag read instruction
// if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior.
}
analyzeVIreg2(It, 1);
}

View File

@ -33,16 +33,22 @@
} \
}
#define branchCase(Xcmp) \
#define branchCase(JMPcc) \
CMP16ItoM((uptr)mVU->branch, 0); \
ajmp = Xcmp((uptr)0); \
ajmp = JMPcc((uptr)0); \
break
#define branchCase2() { \
incPC(-2); \
MOV32ItoR(gprT1, (xPC + (2 * 8)) & ((vuIndex) ? 0x3fff:0xfff)); \
mVUallocVIb<vuIndex>(gprT1, _Ft_); \
incPC(+2); \
#define flagSetMacro(xFlag, pFlag, xF, yF, zF) { \
yF += (mVUstall > 3) ? 3 : mVUstall; \
if (yF > zF) { \
pFlag += (yF-zF); \
if (pFlag >= xFlag) pFlag = (xFlag-1); \
zF++; \
xF = (yF-zF); \
zF = yF; \
yF -= xF; \
} \
yF++; \
}
#define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
@ -85,61 +91,45 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
// Ensure last ~4+ instructions update mac flags
int endPC = iPC;
int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances
for (int i = mVUcount, int iX = 0; i > 0; i--, aCount++) {
u32 aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances
for (int i = mVUcount, iX = 0; i > 0; i--, aCount++) {
if (doStatus) { mVUinfo |= _doMac; iX++; if ((iX >= 4) || (aCount > 4)) { break; } }
incPC2(-2);
}
// Status/Mac Flags Setup Code
int xStatus = 8; // Status Instance starts at #0 on every block ((8&3) == 0)
int xMac = 8; // Mac Instance starts at #0 on every block ((8&3) == 0)
int pStatus = 3;
int pMac = 3;
int yStatus = 0;
int xStatus = 8, xMac = 8, xClip = 8; // Flag Instances start at #0 on every block ((8&3) == 0)
int pStatus = 3, pMac = 3, pClip = 3;
int xS = 0, yS = 1, zS = 0;
int xM = 0, yM = 1, zM = 0;
int xCount = mVUcount; // Backup count
int xC = 0, yC = 1, zC = 0;
u32 xCount = mVUcount; // Backup count
iPC = mVUstartPC;
for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
if (((xCount - mVUcount) > aCount) && isFSSET) mVUstatusFlagOp<vuIndex>(); // Don't Optimize out on the last ~4+ instructions
yS += (mVUstall > 3) ? 3 : mVUstall;
if (yS > zS) {
pStatus += (yS-zS);
if (pStatus >= xStatus) pStatus = (xStatus-1);
zS++;
xS = (yS-zS);
zS = yS;
yS -= xS;
}
yS++;
yM += (mVUstall > 3) ? 3 : mVUstall;
if (yM > zM) {
pMac += (yM-zM);
if (pMac >= xMac) pMac = (xMac-1);
zM++;
xM = (yM-zM);
zM = yM;
yM -= xM;
}
yM++;
flagSetMacro(xStatus, pStatus, xS, yS, zS); // Handles _fvsinstances
flagSetMacro(xMac, pMac, xM, yM, zM); // Handles _fvminstances
flagSetMacro(xClip, pClip, xC, yC, zC); // Handles _fvcinstances
mVUinfo |= (xStatus&3) << 12; // _fsInstance
mVUinfo |= (xMac&3) << 10; // _fmInstance
mVUinfo |= (xClip&3) << 14; // _fcInstance
mVUinfo |= (pStatus&3) << 18; // _fvsInstance
mVUinfo |= (pMac&3) << 16; // _fvmInstance
mVUinfo |= (pClip&3) << 20; // _fvcInstance
if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1); }
if (doMac) { xMac = (xMac+1); }
if (doClip) { xClip = (xClip+1); }
incPC2(2);
}
mVUcount = xCount; // Restore count
// Setup Last 4 instances of Status/Mac flags (needed for accurate block linking)
iPC = endPC;
for (int i = 3, int j = 3, int ii = 1, int jj = 1; aCount > 0; ii++, jj++, aCount--) {
for (int i = 3, j = 3, ii = 1, jj = 1; aCount > 0; ii++, jj++, aCount--) {
if ((doStatus||isFSSET||doDivFlag) && (i >= 0)) {
for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; }
}
@ -156,8 +146,8 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
// Recompiles Code for Proper Flags on Block Linkings
microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) {
PUSHR(gprR); // Backup gprR
PUSHR(gprESP); // Backup gprESP
PUSH32R(gprR); // Backup gprR
PUSH32R(gprESP); // Backup gprESP
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
@ -184,8 +174,8 @@ microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) {
OR32RtoR(gprF2, getFlagReg2(bMac[2]));
OR32RtoR(gprF3, getFlagReg2(bMac[3]));
POPR(gprESP); // Restore gprESP
POPR(gprR); // Restore gprR
POP32R(gprESP); // Restore gprESP
POP32R(gprR); // Restore gprR
}
microVUt(void) mVUincCycles(int x) {
@ -245,15 +235,14 @@ microVUt(void) mVUdivSet() {
// Recompiler
//------------------------------------------------------------------
microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
microVU* mVU = mVUx;
microBlock block;
u8* thisPtr = mVUcurProg.x86Ptr;
u8* thisPtr = mVUcurProg.x86ptr;
iPC = startPC / 4;
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pblock = mVUblock[iPC/2]->search(pipelineState, pState);
if (block) { return pblock->x86ptrStart; }
microBlock* pblock = mVUblock[iPC/2]->search((microRegInfo*)pState);
if (pblock) { return pblock->x86ptrStart; }
// First Pass
setCode();
@ -291,9 +280,7 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
setCode();
for (bool x = 1; x; ) {
if (isEOB) { x = 0; }
//if (isBranch2) { mVUopU<vuIndex, 1>(); incPC(2); }
if (isNop) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
if (isNOP) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
else if (!swapOps) { doUpperOp(); incPC(1); mVUopL<vuIndex, 1>(); }
else { incPC(1); mVUopL<vuIndex, 1>(); incPC(-1); doUpperOp(); incPC(1); }
@ -307,17 +294,30 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
case 6: branchCase(JLE32); // IBLEQ
case 7: branchCase(JL32); // IBLTZ
case 8: branchCase(JNZ32); // IBNEQ
case 2: branchCase2(); // BAL
case 1:
case 1: case 2: // B/BAL
// ToDo: search for block
// (remember about global variables and recursion!)
mVUsetFlagsRec<vuIndex>(bStatus, bMac);
ajmp = JMP32((uptr)0);
break; // B/BAL
case 9: branchCase2(); // JALR
case 10: break; // JR/JALR
//mVUcurProg.x86Ptr
break;
case 9: case 10: // JR/JALR
mVUsetFlagsRec<vuIndex>(bStatus, bMac);
PUSH32R(gprR); // Backup EDX
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
AND32ItoR(gprT2, (vuIndex) ? 0x3ff8 : 0xff8);
MOV32ItoR(gprR, (u32)&pblock->pState); // Get pState (EDX second argument for __fastcall)
//ToDo: Add block to block manager and use its address instead of pblock!
if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
else CALLFunc((uptr)mVUcompileVU1);
POP32R(gprR); // Restore
JMPR(gprT1); // Jump to rec-code address
break;
}
//mVUcurProg.x86Ptr
return thisPtr;
}
}
@ -333,11 +333,14 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
//MOV32ItoM((uptr)&mVU->p, mVU->p);
//MOV32ItoM((uptr)&mVU->q, mVU->q);
AND32ItoM((uptr)&microVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&microVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
return thisPtr;
}
void* __fastcall mVUcompileVU0(u32 startPC, uptr pState) { return mVUcompile<0>(startPC, pState); }
void* __fastcall mVUcompileVU1(u32 startPC, uptr pState) { return mVUcompile<1>(startPC, pState); }
#endif //PCSX2_MICROVU

View File

@ -59,8 +59,8 @@ microVUt(void) mVUdispatcherA() {
}
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)&mVU_maxvals[0]);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)&mVU_minvals[0]);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]);
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ

View File

@ -1055,24 +1055,29 @@ microVUf(void) mVU_XGKICK() {
microVUf(void) mVU_B() {
microVU* mVU = mVUx;
mVUbranch = 1;
if (!recPass) { /*mVUinfo |= _isBranch2;*/ }
}
microVUf(void) mVU_BAL() {
microVU* mVU = mVUx;
mVUbranch = 2;
if (!recPass) { /*mVUinfo |= _isBranch2;*/ analyzeVIreg2(_Ft_, 1); }
else {}
if (!recPass) { analyzeVIreg2(_Ft_, 1); }
else {
MOV32ItoR(gprT1, bSaveAddr);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
// Note: Not sure if the lower instruction in the branch-delay slot
// should read the previous VI-value or the VI-value resulting from this branch.
// This code does the latter...
}
}
microVUf(void) mVU_IBEQ() {
microVU* mVU = mVUx;
mVUbranch = 3;
if (!recPass) { mVUanalyzeBranch2<vuIndex>(_Fs_, _Ft_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else { mVUallocVIa<vuIndex>(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); }
MOV32RtoM((uptr)mVU->branch, gprT1);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBGEZ() {
@ -1080,10 +1085,9 @@ microVUf(void) mVU_IBGEZ() {
mVUbranch = 4;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
//SHR32ItoR(gprT1, 15);
MOV32RtoM((uptr)mVU->branch, gprT1);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBGTZ() {
@ -1091,9 +1095,9 @@ microVUf(void) mVU_IBGTZ() {
mVUbranch = 5;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOV32RtoM((uptr)mVU->branch, gprT1);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBLEZ() {
@ -1101,9 +1105,9 @@ microVUf(void) mVU_IBLEZ() {
mVUbranch = 6;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOV32RtoM((uptr)mVU->branch, gprT1);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBLTZ() {
@ -1111,10 +1115,9 @@ microVUf(void) mVU_IBLTZ() {
mVUbranch = 7;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
//SHR32ItoR(gprT1, 15);
MOV32RtoM((uptr)mVU->branch, gprT1);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBNE() {
@ -1122,22 +1125,37 @@ microVUf(void) mVU_IBNE() {
mVUbranch = 8;
if (!recPass) { mVUanalyzeBranch2<vuIndex>(_Fs_, _Ft_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else { mVUallocVIa<vuIndex>(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); }
MOV32RtoM((uptr)mVU->branch, gprT1);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_JR() {
microVU* mVU = mVUx;
mVUbranch = 9;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
microVUf(void) mVU_JALR() {
microVU* mVU = mVUx;
mVUbranch = 10;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); analyzeVIreg2(_Ft_, 1); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOV32RtoM((uptr)&mVU->branch, gprT1);
MOV32ItoR(gprT1, bSaveAddr);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
// Note: Not sure if the lower instruction in the branch-delay slot
// should read the previous VI-value or the VI-value resulting from this branch.
// This code does the latter...
}
}
#endif //PCSX2_MICROVU

View File

@ -144,7 +144,6 @@ declareAllVariables
#define mVUbranch mVUallocInfo.branch
#define mVUcycles mVUallocInfo.cycles
#define mVUcount mVUallocInfo.count
//#define mVUstall mVUallocInfo.maxStall
#define mVUregs mVUallocInfo.regs
#define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC
@ -157,6 +156,7 @@ declareAllVariables
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
#define incCycles(x) { mVUincCycles<vuIndex>(x); }
#define bSaveAddr ((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
#define _isNOP (1<<0) // Skip Lower Instruction
#define _isBranch (1<<1) // Cur Instruction is a Branch
@ -170,14 +170,14 @@ declareAllVariables
#define _doFlags (3<<8)
#define _doMac (1<<8)
#define _doStatus (1<<9)
#define _fmInstance (3<<10)
#define _fsInstance (3<<12)
#define _fpsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpcInstance (3<<14)
#define _fvmInstance (3<<16)
#define _fvsInstance (3<<18)
#define _fvcInstance (3<<20)
#define _fmInstance (3<<10) // Mac Write Instance
#define _fsInstance (3<<12) // Status Write Instance
#define _fcInstance (3<<14) // Clip Write Instance
#define _fpsInstance (3<<12) // Prev.S. Write Instance
#define _fpcInstance (3<<14) // Prev.C. Write Instance
#define _fvmInstance (3<<16) // Mac Read Instance (at T-stage for lower instruction)
#define _fvsInstance (3<<18) // Status Read Instance (at T-stage for lower instruction)
#define _fvcInstance (3<<20) // Clip Read Instance (at T-stage for lower instruction)
#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
#define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
#define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches)
@ -186,8 +186,7 @@ declareAllVariables
#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction
#define _isFSSET (1<<27) // Cur Instruction is FSSET
#define _doDivFlag (1<<28) // Transfer Div flag to Status Flag
//#define _isBranch2 (1<<31) // Cur Instruction is a Branch that writes VI regs (BAL/JALR)
#define _doClip (1<<29)
#define isNOP (mVUinfo & (1<<0))
#define isBranch (mVUinfo & (1<<1))
@ -217,7 +216,7 @@ declareAllVariables
#define swapOps (mVUinfo & (1<<26))
#define isFSSET (mVUinfo & (1<<27))
#define doDivFlag (mVUinfo & (1<<28))
//#define isBranch2 (mVUinfo & (1<<31))
#define doClip (mVUinfo & (1<<29))
#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9)
#define mmVI(_VIreg_) (_VIreg_ - 1)

View File

@ -93,40 +93,39 @@ microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
}
}
// Modifies the Source Reg!
microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) {
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
SSE_MOVSS_XMM_to_M32(offset+4, reg);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1);
case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9);
SSE_MOVLPS_XMM_to_M64(offset+4, reg);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_M64(offset+4, reg);
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset, reg);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
case 9: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
case 10: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVHLPS_XMM_to_XMM(reg, reg);
SSE_MOVSS_XMM_to_M32(offset+8, reg);
break; //XZ
case 11: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_M64(offset, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_M64(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_M64(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
case 14: SSE_MOVLPS_XMM_to_M64(offset, reg);
SSE_MOVHLPS_XMM_to_XMM(reg, reg);
SSE_MOVSS_XMM_to_M32(offset+8, reg);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y
@ -138,39 +137,38 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) {
}
}
// Modifies the Source Reg!
microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9);
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset+4);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
case 9: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
case 10: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHLPS_XMM_to_XMM(reg, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8);
break; //XZ
case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
case 14: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHLPS_XMM_to_XMM(reg, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X

View File

@ -31,7 +31,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
microVU* mVU = mVUx;
int sReg, mReg = gprT1;
static u8 *pjmp, *pjmp2;
static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
if (!doFlags) return;
if (!doMac) { regT1 = reg; }