From 0587cdc21085098dcfd97d80825249529dc19a4f Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 19 Aug 2009 07:48:20 +0000 Subject: [PATCH] microVU: 70% implemented Branch in Branch Delay Slots. Hopefully I'll finish tomorrow. Compare Star Ocean 3 intro with mVU vs sVU, and you can see mVU now correctly renders the title menu/new game startup screen =) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1652 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.h | 1 + pcsx2/x86/microVU_Analyze.inl | 19 +++++----- pcsx2/x86/microVU_Branch.inl | 36 ++++++++++--------- pcsx2/x86/microVU_Compile.inl | 15 ++++---- pcsx2/x86/microVU_IR.h | 3 +- pcsx2/x86/microVU_Lower.inl | 68 +++++++++++++++++++++++++---------- pcsx2/x86/microVU_Misc.h | 1 + 7 files changed, 92 insertions(+), 51 deletions(-) diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index db7063f98e..3e9066e7ac 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -159,6 +159,7 @@ struct microVU { u32 VIbackup; // Holds a backup of a VI reg if modified before a branch u32 VIxgkick; // Holds a backup of a VI reg used for xgkick-delays u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) + u32 evilBranch; // For Branches in Branch Delay Slots, holds Address to Jump to u32 p; // Holds current P instance index u32 q; // Holds current Q instance index u32 totalCycles; // Total Cycles that mVU is expected to run for diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 5f3a826c73..7433bb2314 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -389,30 +389,31 @@ microVUt(void) analyzeBranchVI(mV, int xReg, bool &infoVar) { } // Branch in Branch Delay-Slots -microVUt(void) mVUbranchCheck(mV) { - if (!mVUcount) return; +microVUt(int) mVUbranchCheck(mV) { + if (!mVUcount) return 0; incPC(-2); if (mVUlow.branch) { incPC(2); - Console::Error("microVU%d Warning: Branch in Branch delay slot! [%04x]", params mVU->index, xPC); - mVUlow.isNOP = 1; + mVUlow.evilBranch = 1; + mVUregs.blockType = 2; + DevCon::Status("microVU%d Warning: Branch in Branch delay slot! [%04x]", params mVU->index, xPC); + return 1; } - else incPC(2); + incPC(2); + return 0; } microVUt(void) mVUanalyzeCondBranch1(mV, int Is) { - mVUbranchCheck(mVU); analyzeVIreg1(Is, mVUlow.VI_read[0]); - if (!mVUstall) { + if (!mVUstall && !mVUbranchCheck(mVU)) { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); } } microVUt(void) mVUanalyzeCondBranch2(mV, int Is, int It) { - mVUbranchCheck(mVU); analyzeVIreg1(Is, mVUlow.VI_read[0]); analyzeVIreg1(It, mVUlow.VI_read[1]); - if (!mVUstall) { + if (!mVUstall && !mVUbranchCheck(mVU)) { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); analyzeBranchVI(mVU, It, mVUlow.memReadIt); } diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index 27dc66bf37..efd66e0265 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -96,10 +96,25 @@ void normBranchCompile(microVU* mVU, u32 branchPC) { else { mVUcompile(mVU, branchPC, (uptr)&mVUregs); } } +void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) { + using namespace x86Emitter; + memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + mVUsetupBranch(mVU, mFC); + mVUbackupRegs(mVU); + + if (isEvilJump) MOV32MtoR(gprT2, (uptr)&mVU->evilBranch); + else MOV32MtoR(gprT2, (uptr)&mVU->branch); + MOV32ItoR(gprR, (u32)&mVUpBlock->pStateEnd); + + if (!mVU->index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState) + else xCALL(mVUcompileJIT<1>); + + mVUrestoreRegs(mVU); + JMPR(gprT1); // Jump to rec-code address +} + void normBranch(mV, microFlagCycles& mFC) { - incPC(-3); // Go back to branch opcode (to get branch imm addr) - // E-bit Branch if (mVUup.eBit) { iPC = branchAddr/4; mVUendProgram(mVU, &mFC, 1); return; } @@ -112,6 +127,7 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) { using namespace x86Emitter; mVUsetupBranch(mVU, mFC); xCMP(ptr16[&mVU->branch], 0); + incPC(3); if (mVUup.eBit) { // Conditional Branch With E-Bit Set mVUendProgram(mVU, &mFC, 2); xForwardJump8 eJMP((JccComparisonType)JMPcc); @@ -155,8 +171,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) { void normJump(mV, microFlagCycles& mFC) { using namespace x86Emitter; - mVUprint("mVUcompile JR/JALR"); - incPC(-3); // Go back to jump opcode if (mVUlow.constJump.isValid) { // Jump Address is Constant if (mVUup.eBit) { // E-bit Jump @@ -175,18 +189,6 @@ void normJump(mV, microFlagCycles& mFC) { MOV32MtoR(gprT1, (uptr)&mVU->branch); MOV32RtoM((uptr)&mVU->regs->VI[REG_TPC].UL, gprT1); xJMP(mVU->exitFunct); - return; } - - memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); - mVUsetupBranch(mVU, mFC); - - mVUbackupRegs(mVU); - MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX 1st argument for __fastcall) - MOV32ItoR(gprR, (u32)&mVUpBlock->pStateEnd); // Get pState (EDX 2nd argument for __fastcall) - - if (!mVU->index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState) - else xCALL(mVUcompileJIT<1>); - mVUrestoreRegs(mVU); - JMPR(gprT1); // Jump to rec-code address + else normJumpCompile(mVU, mFC, 0); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 6d8a63d375..c84abecf8f 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -174,14 +174,15 @@ microVUt(void) branchWarning(mV) { } microVUt(void) eBitPass1(mV, int& branch) { - if (!mVUregs.blockType) { + if (mVUregs.blockType != 1) { branch = 1; mVUup.eBit = 1; } } -microVUt(void) eBitWarning(mV, u32 endCount) { - if (endCount == 1) Console::Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", params mVU->index, xPC); +microVUt(void) eBitWarning(mV) { + if (mVUpBlock->pState.blockType == 1) Console::Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", params mVU->index, xPC); + if (mVUpBlock->pState.blockType == 2) Console::Error("microVU%d Warning: Branch, Branch, Branch! [%04x]", params mVU->index, xPC); incPC(2); if (curI & _Ebit_) { DevCon::Status("microVU%d: E-bit in Branch delay slot! [%04x]", params mVU->index, xPC); @@ -361,7 +362,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { mVUinfo.writeP = !mVU->p; if (branch >= 2) { mVUinfo.isEOB = 1; if (branch == 3) { mVUinfo.isBdelay = 1; } mVUcount++; branchWarning(mVU); break; } else if (branch == 1) { branch = 2; } - if (mVUbranch) { mVUsetFlagInfo(mVU); eBitWarning(mVU, endCount); branch = 3; mVUbranch = 0; } + if (mVUbranch) { mVUsetFlagInfo(mVU); eBitWarning(mVU); branch = 3; mVUbranch = 0; } incPC(1); } @@ -385,11 +386,13 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { else { doSwapOp(mVU); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } if (!doRegAlloc) { mVU->regAlloc->flushAll(); } - if (!mVUinfo.isBdelay) { incPC(1); } + if (_isBlock2) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; } + else if (!mVUinfo.isBdelay) { incPC(1); } else { mVUsetupRange(mVU, xPC, 0); mVUdebugNOW(1); - switch (mVUbranch) { + incPC(-3); // Go back to branch opcode + switch (mVUlow.branch) { case 1: case 2: normBranch(mVU, mFC); return thisPtr; // B/BAL case 9: case 10: normJump (mVU, mFC); return thisPtr; // JR/JALR case 3: condBranch(mVU, mFC, Jcc_Equal); return thisPtr; // IBEQ diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 9ace1ed3f5..8ea463ef7c 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -44,7 +44,7 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares u8 VI[16]; regInfo VF[32]; u8 flags; // clip x2 :: status x2 - u8 blockType; // 0 = Normal; 1 = Compile one instruction with E-bit termination + u8 blockType; // 0 = Normal; 1 = Compile one instruction (E-bit End); 2 = Compile one instruction (Branch End) u8 padding[5]; // 160 bytes #if defined(_MSC_VER) }; @@ -107,6 +107,7 @@ struct microLowerOp { microVIreg VI_read[2]; // VI regs read by this instruction microConstInfo constJump; // Constant Reg Info for JR/JARL instructions u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR) + bool evilBranch;// This instruction is a Branch in a Branch Delay Slot bool isNOP; // This instruction is a NOP bool isFSSET; // This instruction is a FSSET bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 94e5d15ad9..9b6d3f36c9 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1164,9 +1164,21 @@ void setBranchA(mP, int x, int _x_) { pass4 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; } } +void condEvilBranch(mV, int JMPcc) { + using namespace x86Emitter; + xCMP(ax, 0); + xMOV(ptr32[&mVU->evilBranch], branchAddr); + xForwardJump8 cJMP((JccComparisonType)JMPcc); + incPC(-2); // Branch Not Taken + xMOV(ptr32[&mVU->evilBranch], ((branchAddr+8) & (mVU->microMemSize-8))); + incPC(2); + cJMP.SetTarget(); +} + mVUop(mVU_B) { setBranchA(mX, 1, 0); pass1 { mVUanalyzeNormBranch(mVU, 0, 0); } + pass2 { if (mVUlow.evilBranch) { MOV32ItoM((uptr)&mVU->evilBranch, branchAddr); } } pass3 { mVUlog("B [%04x]", branchAddr, branchAddr); } } @@ -1176,76 +1188,93 @@ mVUop(mVU_BAL) { pass2 { MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(mVU, gprT1, _It_); + if (mVUlow.evilBranch) { MOV32ItoM((uptr)&mVU->evilBranch, branchAddr); } } pass3 { mVUlog("BAL vi%02d [%04x]", _Ft_, branchAddr, branchAddr); } } mVUop(mVU_IBEQ) { + using namespace x86Emitter; setBranchA(mX, 3, 0); pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } pass2 { if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); else mVUallocVIa(mVU, gprT1, _Is_); + if (mVUlow.memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup); else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); } - MOV32RtoM((uptr)&mVU->branch, gprT1); + + if (!mVUlow.evilBranch) { MOV32RtoM((uptr)&mVU->branch, gprT1); } + else { condEvilBranch(mVU, Jcc_Equal); } } pass3 { mVUlog("IBEQ vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr, branchAddr); } } mVUop(mVU_IBGEZ) { + using namespace x86Emitter; setBranchA(mX, 4, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { - if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); - else mVUallocVIa(mVU, gprT1, _Is_); - MOV32RtoM((uptr)&mVU->branch, gprT1); + if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); + else mVUallocVIa(mVU, gprT1, _Is_); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else condEvilBranch(mVU, Jcc_GreaterOrEqual); } pass3 { mVUlog("IBGEZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } } mVUop(mVU_IBGTZ) { + using namespace x86Emitter; setBranchA(mX, 5, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { - if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); - else mVUallocVIa(mVU, gprT1, _Is_); - MOV32RtoM((uptr)&mVU->branch, gprT1); + if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); + else mVUallocVIa(mVU, gprT1, _Is_); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else condEvilBranch(mVU, Jcc_Greater); } pass3 { mVUlog("IBGTZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } } mVUop(mVU_IBLEZ) { + using namespace x86Emitter; setBranchA(mX, 6, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { - if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); - else mVUallocVIa(mVU, gprT1, _Is_); - MOV32RtoM((uptr)&mVU->branch, gprT1); + if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); + else mVUallocVIa(mVU, gprT1, _Is_); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else condEvilBranch(mVU, Jcc_LessOrEqual); } pass3 { mVUlog("IBLEZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } } mVUop(mVU_IBLTZ) { + using namespace x86Emitter; setBranchA(mX, 7, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } - pass2 { - if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); - else mVUallocVIa(mVU, gprT1, _Is_); - MOV32RtoM((uptr)&mVU->branch, gprT1); + pass2 { + if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); + else mVUallocVIa(mVU, gprT1, _Is_); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else condEvilBranch(mVU, Jcc_Less); } pass3 { mVUlog("IBLTZ vi%02d [%04x]", _Fs_, branchAddr, branchAddr); } } mVUop(mVU_IBNE) { + using namespace x86Emitter; setBranchA(mX, 8, 0); pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } pass2 { if (mVUlow.memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup); else mVUallocVIa(mVU, gprT1, _Is_); + if (mVUlow.memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup); else { mVUallocVIa(mVU, gprT2, _It_); XOR32RtoR(gprT1, gprT2); } - MOV32RtoM((uptr)&mVU->branch, gprT1); + + if (!mVUlow.evilBranch) { MOV32RtoM((uptr)&mVU->branch, gprT1); } + else { condEvilBranch(mVU, Jcc_NotEqual); } } pass3 { mVUlog("IBNE vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr, branchAddr); } } @@ -1254,11 +1283,12 @@ mVUop(mVU_JR) { mVUbranch = 9; pass1 { mVUanalyzeJump(mVU, _Is_, 0, 0); } pass2 { - if (!mVUlow.constJump.isValid) { + if (!mVUlow.constJump.isValid || mVUlow.evilBranch) { mVUallocVIa(mVU, gprT1, _Is_); SHL32ItoR(gprT1, 3); AND32ItoR(gprT1, mVU->microMemSize - 8); - MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else MOV32RtoM((uptr)&mVU->evilBranch, gprT1); } } pass3 { mVUlog("JR [vi%02d]", _Fs_); } @@ -1268,11 +1298,13 @@ mVUop(mVU_JALR) { mVUbranch = 10; pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); } pass2 { - if (!mVUlow.constJump.isValid) { + if (!mVUlow.constJump.isValid || mVUlow.evilBranch) { mVUallocVIa(mVU, gprT1, _Is_); SHL32ItoR(gprT1, 3); AND32ItoR(gprT1, mVU->microMemSize - 8); MOV32RtoM((uptr)&mVU->branch, gprT1); + if (!mVUlow.evilBranch) MOV32RtoM((uptr)&mVU->branch, gprT1); + else MOV32RtoM((uptr)&mVU->evilBranch, gprT1); } MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(mVU, gprT1, _It_); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 01985cc55f..3f5ec0c28a 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -206,6 +206,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); #define mFLAG mVUinfo.mFlag #define cFLAG mVUinfo.cFlag #define mVUrange mVUcurProg.ranges.range[mVUcurProg.ranges.total] +#define _isBlock2 (mVUpBlock->pState.blockType == 2) #define xPC ((iPC / 2) * 8) #define curI ((u32*)mVU->regs->Micro)[iPC] //mVUcurProg.data[iPC] #define setCode() { mVU->code = curI; }