From 3c458affadc0545a1d4ac07787ee752d0f63371e Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 20 Aug 2009 00:49:13 +0000 Subject: [PATCH] microVU: Finished properly supporting branch in branch delay slots. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1654 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.h | 1 + pcsx2/x86/microVU_Analyze.inl | 1 + pcsx2/x86/microVU_Compile.inl | 2 +- pcsx2/x86/microVU_IR.h | 5 ++- pcsx2/x86/microVU_Lower.inl | 77 +++++++++++++++++++++-------------- pcsx2/x86/microVU_Misc.h | 6 ++- 6 files changed, 56 insertions(+), 36 deletions(-) diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 3e9066e7ac..49ae3feef6 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -159,6 +159,7 @@ struct microVU { u32 VIbackup; // Holds a backup of a VI reg if modified before a branch u32 VIxgkick; // Holds a backup of a VI reg used for xgkick-delays u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) + u32 badBranch; // For Branches in Branch Delay Slots, holds Address the first Branch went to + 8 u32 evilBranch; // For Branches in Branch Delay Slots, holds Address to Jump to u32 p; // Holds current P instance index u32 q; // Holds current Q instance index diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 7433bb2314..b415d738c0 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -393,6 +393,7 @@ microVUt(int) mVUbranchCheck(mV) { if (!mVUcount) return 0; incPC(-2); if (mVUlow.branch) { + mVUlow.badBranch = 1; incPC(2); mVUlow.evilBranch = 1; mVUregs.blockType = 2; diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index c84abecf8f..9113ffbf9d 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -386,7 +386,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { else { doSwapOp(mVU); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } if (!doRegAlloc) { mVU->regAlloc->flushAll(); } - if (_isBlock2) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; } + if (isEvilBlock) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; } else if (!mVUinfo.isBdelay) { incPC(1); } else { mVUsetupRange(mVU, xPC, 0); diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 8ea463ef7c..366bd19d53 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -44,7 +44,7 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares u8 VI[16]; regInfo VF[32]; u8 flags; // clip x2 :: status x2 - u8 blockType; // 0 = Normal; 1 = Compile one instruction (E-bit End); 2 = Compile one instruction (Branch End) + u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) u8 padding[5]; // 160 bytes #if defined(_MSC_VER) }; @@ -107,7 +107,8 @@ struct microLowerOp { microVIreg VI_read[2]; // VI regs read by this instruction microConstInfo constJump; // Constant Reg Info for JR/JARL instructions u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR) - bool evilBranch;// This instruction is a Branch in a Branch Delay Slot + bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot + bool evilBranch;// This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch) bool isNOP; // This instruction is a NOP bool isFSSET; // This instruction is a FSSET bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 9b6d3f36c9..03df227c96 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1166,19 +1166,32 @@ void setBranchA(mP, int x, int _x_) { void condEvilBranch(mV, int JMPcc) { using namespace x86Emitter; - xCMP(ax, 0); + if (mVUlow.badBranch) { + xMOV(ptr32[&mVU->branch], eax); + xMOV(ptr32[&mVU->badBranch], branchAddrN); + xCMP(ax, 0); + xForwardJump8 cJMP((JccComparisonType)JMPcc); + incPC(4); // Branch Not Taken + xMOV(ptr32[&mVU->badBranch], xPC); + incPC(-4); + cJMP.SetTarget(); + return; + } xMOV(ptr32[&mVU->evilBranch], branchAddr); + xCMP(ax, 0); xForwardJump8 cJMP((JccComparisonType)JMPcc); - incPC(-2); // Branch Not Taken - xMOV(ptr32[&mVU->evilBranch], ((branchAddr+8) & (mVU->microMemSize-8))); - incPC(2); + xMOV(eax, ptr32[&mVU->badBranch]); // Branch Not Taken + xMOV(ptr32[&mVU->evilBranch], eax); cJMP.SetTarget(); } mVUop(mVU_B) { setBranchA(mX, 1, 0); pass1 { mVUanalyzeNormBranch(mVU, 0, 0); } - pass2 { if (mVUlow.evilBranch) { MOV32ItoM((uptr)&mVU->evilBranch, branchAddr); } } + pass2 { + if (mVUlow.badBranch) { MOV32ItoM((uptr)&mVU->badBranch, branchAddrN); } + if (mVUlow.evilBranch) { MOV32ItoM((uptr)&mVU->evilBranch, branchAddr); } + } pass3 { mVUlog("B [%04x]", branchAddr, branchAddr); } } @@ -1188,7 +1201,8 @@ mVUop(mVU_BAL) { pass2 { MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(mVU, gprT1, _It_); - if (mVUlow.evilBranch) { MOV32ItoM((uptr)&mVU->evilBranch, branchAddr); } + if (mVUlow.badBranch) { MOV32ItoM((uptr)&mVU->badBranch, branchAddrN); } + if (mVUlow.evilBranch) { MOV32ItoM((uptr)&mVU->evilBranch, branchAddr); } } pass3 { mVUlog("BAL vi%02d [%04x]", _Ft_, branchAddr, branchAddr); } } @@ -1204,8 +1218,8 @@ mVUop(mVU_IBEQ) { if (mVUlow.memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup); else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); } - if (!mVUlow.evilBranch) { MOV32RtoM((uptr)&mVU->branch, gprT1); } - else { condEvilBranch(mVU, Jcc_Equal); } + if (!(isBadOrEvil)) MOV32RtoM((uptr)&mVU->branch, gprT1); + else condEvilBranch(mVU, Jcc_Equal); } pass3 { mVUlog("IBEQ vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr, branchAddr); } } @@ -1217,7 +1231,7 @@ mVUop(mVU_IBGEZ) { pass2 { if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); else mVUallocVIa(mVU, gprT1, _Is_); - if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!(isBadOrEvil)) MOV32RtoM((uptr)&mVU->branch, gprT1); else condEvilBranch(mVU, Jcc_GreaterOrEqual); } pass3 { mVUlog("IBGEZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } @@ -1230,7 +1244,7 @@ mVUop(mVU_IBGTZ) { pass2 { if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); else mVUallocVIa(mVU, gprT1, _Is_); - if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!(isBadOrEvil)) MOV32RtoM((uptr)&mVU->branch, gprT1); else condEvilBranch(mVU, Jcc_Greater); } pass3 { mVUlog("IBGTZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } @@ -1243,7 +1257,7 @@ mVUop(mVU_IBLEZ) { pass2 { if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); else mVUallocVIa(mVU, gprT1, _Is_); - if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!(isBadOrEvil)) MOV32RtoM((uptr)&mVU->branch, gprT1); else condEvilBranch(mVU, Jcc_LessOrEqual); } pass3 { mVUlog("IBLEZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } @@ -1256,7 +1270,7 @@ mVUop(mVU_IBLTZ) { pass2 { if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); else mVUallocVIa(mVU, gprT1, _Is_); - if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!(isBadOrEvil)) MOV32RtoM((uptr)&mVU->branch, gprT1); else condEvilBranch(mVU, Jcc_Less); } pass3 { mVUlog("IBLTZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } @@ -1273,24 +1287,32 @@ mVUop(mVU_IBNE) { if (mVUlow.memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup); else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); } - if (!mVUlow.evilBranch) { MOV32RtoM((uptr)&mVU->branch, gprT1); } - else { condEvilBranch(mVU, Jcc_NotEqual); } + if (!(isBadOrEvil)) MOV32RtoM((uptr)&mVU->branch, gprT1); + else condEvilBranch(mVU, Jcc_NotEqual); } pass3 { mVUlog("IBNE vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr, branchAddr); } } +void normJumpPass2(mV) { + if (!mVUlow.constJump.isValid || mVUlow.evilBranch) { + mVUallocVIa(mVU, gprT1, _Is_); + SHL32ItoR(gprT1, 3); + AND32ItoR(gprT1, mVU->microMemSize - 8); + MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else MOV32RtoM((uptr)&mVU->evilBranch, gprT1); + if (mVUlow.badBranch) { + ADD32ItoR(gprT1, 8); + AND32ItoR(gprT1, mVU->microMemSize - 8); + MOV32RtoM((uptr)&mVU->badBranch, gprT1); + } + } +} + mVUop(mVU_JR) { mVUbranch = 9; pass1 { mVUanalyzeJump(mVU, _Is_, 0, 0); } - pass2 { - if (!mVUlow.constJump.isValid || mVUlow.evilBranch) { - mVUallocVIa(mVU, gprT1, _Is_); - SHL32ItoR(gprT1, 3); - AND32ItoR(gprT1, mVU->microMemSize - 8); - if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); - else MOV32RtoM((uptr)&mVU->evilBranch, gprT1); - } - } + pass2 { normJumpPass2(mVU); } pass3 { mVUlog("JR [vi%02d]", _Fs_); } } @@ -1298,14 +1320,7 @@ mVUop(mVU_JALR) { mVUbranch = 10; pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); } pass2 { - if (!mVUlow.constJump.isValid || mVUlow.evilBranch) { - mVUallocVIa(mVU, gprT1, _Is_); - SHL32ItoR(gprT1, 3); - AND32ItoR(gprT1, mVU->microMemSize - 8); - MOV32RtoM((uptr)&mVU->branch, gprT1); - if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); - else MOV32RtoM((uptr)&mVU->evilBranch, gprT1); - } + normJumpPass2(mVU); MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(mVU, gprT1, _It_); } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 3f5ec0c28a..943954a4ed 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -206,14 +206,16 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); #define mFLAG mVUinfo.mFlag #define cFLAG mVUinfo.cFlag #define mVUrange mVUcurProg.ranges.range[mVUcurProg.ranges.total] -#define _isBlock2 (mVUpBlock->pState.blockType == 2) +#define isEvilBlock (mVUpBlock->pState.blockType == 2) +#define isBadOrEvil (mVUlow.badBranch || mVUlow.evilBranch) #define xPC ((iPC / 2) * 8) #define curI ((u32*)mVU->regs->Micro)[iPC] //mVUcurProg.data[iPC] #define setCode() { mVU->code = curI; } #define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } #define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define bSaveAddr (((xPC + 16) & (mVU->microMemSize-8)) / 8) -#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & (mVU->microMemSize-8)) +#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & (mVU->microMemSize-8)) +#define branchAddrN ((xPC + 16 + (_Imm11_ * 8)) & (mVU->microMemSize-8)) #define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04)) #define cmpOffset(x) ((u8*)&(((u8*)x)[mVUprogI.ranges.range[i][0]])) #define Rmem (uptr)&mVU->regs->VI[REG_R].UL