From 18a12b10b021750d14a52e8f844b9625130a374f Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 13 Aug 2009 05:04:04 +0000 Subject: [PATCH] microVU: - Cleaned up microVU_Compile - Added file microVU_Branch git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1621 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/microVU.h | 1 + pcsx2/x86/microVU_Branch.inl | 201 +++++++++++ pcsx2/x86/microVU_Compile.inl | 385 ++++++--------------- pcsx2/x86/microVU_Flags.inl | 51 ++- pcsx2/x86/microVU_IR.h | 7 + 6 files changed, 338 insertions(+), 311 deletions(-) create mode 100644 pcsx2/x86/microVU_Branch.inl diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 6594926e75..a300c0fd7d 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2076,6 +2076,10 @@ RelativePath="..\..\x86\microVU_Analyze.inl" > + + diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index be067f2800..e1e1b5cbfd 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -206,5 +206,6 @@ typedef void (__fastcall *mVUrecCall)(u32, u32); #include "microVU_Lower.inl" #include "microVU_Tables.inl" #include "microVU_Flags.inl" +#include "microVU_Branch.inl" #include "microVU_Compile.inl" #include "microVU_Execute.inl" diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl new file mode 100644 index 0000000000..74de944483 --- /dev/null +++ b/pcsx2/x86/microVU_Branch.inl @@ -0,0 +1,201 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +microVUt(void) mVUincCycles(mV, int x); +microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState); + +#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); } +#define sI ((mVUpBlock->pState.needExactMatch & 0x000f) ? 0 : ((mVUpBlock->pState.flags >> 0) & 3)) +#define cI ((mVUpBlock->pState.needExactMatch & 0x0f00) ? 0 : ((mVUpBlock->pState.flags >> 2) & 3)) + +microVUt(void) mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { + + int fStatus = (isEbit) ? findFlagInst(mFC->xStatus, 0x7fffffff) : sI; + int fMac = (isEbit) ? findFlagInst(mFC->xMac, 0x7fffffff) : 0; + int fClip = (isEbit) ? findFlagInst(mFC->xClip, 0x7fffffff) : cI; + int qInst = 0; + int pInst = 0; + mVU->regAlloc->flushAll(); + + if (isEbit) { + mVUprint("mVUcompile ebit"); + memset(&mVUinfo, 0, sizeof(mVUinfo)); + memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); + mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0) + mVUcycles -= 100; + qInst = mVU->q; + pInst = mVU->p; + if (mVUinfo.doDivFlag) { + sFLAG.doFlag = 1; + sFLAG.write = fStatus; + mVUdivSet(mVU); + } + if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } + } + + // Save P/Q Regs + if (qInst) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); + if (isVU1) { + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, pInst ? 3 : 2); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); + } + + // Save Flag Instances + mVUallocSFLAGc(gprT1, gprT2, fStatus); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1); + mVUallocMFLAGa(mVU, gprT1, fMac); + mVUallocCFLAGa(mVU, gprT2, fClip); + MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1); + MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2); + + if (isEbit || isVU1) { // Clear 'is busy' Flags + AND32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif + } + + if (isEbit != 2) { // Save PC, and Jump to Exit Point + MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC); + JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); + } +} + +// Recompiles Code for Proper Flags and Q/P regs on Block Linkings +microVUt(void) mVUsetupBranch(mV, microFlagCycles& mFC) { + mVUprint("mVUsetupBranch"); + + // Flush Allocated Regs + mVU->regAlloc->flushAll(); + + // Shuffle Flag Instances + mVUsetupFlags(mVU, mFC); + + // Shuffle P/Q regs since every block starts at instance #0 + if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); } +} + +void condBranch(mV, microFlagCycles& mFC, microBlock* &pBlock, int JMPcc) { + using namespace x86Emitter; + mVUsetupBranch(mVU, mFC); + xCMP(ptr16[&mVU->branch], 0); + if (mVUup.eBit) { // Conditional Branch With E-Bit Set + mVUendProgram(mVU, &mFC, 2); + xForwardJump8 eJMP((JccComparisonType)JMPcc); + incPC(1); // Set PC to First instruction of Non-Taken Side + xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC); + xJMP(mVU->exitFunct); + eJMP.SetTarget(); + incPC(-4); // Go Back to Branch Opcode to get branchAddr + iPC = branchAddr/4; + xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC); + xJMP(mVU->exitFunct); + return; + } + else { // Normal Conditional Branch + microBlock* bBlock; + incPC2(1); // Check if Branch Non-Taken Side has already been recompiled + blockCreate(iPC/2); + bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); + incPC2(-1); + if (bBlock) { // Branch non-taken has already been compiled + xJcc( xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart ); + + // Check if branch-block has already been compiled + incPC(-3); // Go back to branch opcode (to get branch imm addr) + blockCreate(branchAddr/8); + pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); + if (pBlock) { xJMP( pBlock->x86ptrStart ); } + else { mVUblockFetch(mVU, branchAddr, (uptr)&mVUregs); } + } + else { + s32* ajmp = xJcc32((JccComparisonType)JMPcc); + uptr jumpAddr; + u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs so back them up + memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + + incPC2(1); // Get PC for branch not-taken + mVUcompile(mVU, xPC, (uptr)&mVUregs); + + iPC = bPC; + incPC(-3); // Go back to branch opcode (to get branch imm addr) + jumpAddr = (uptr)mVUblockFetch(mVU, branchAddr, (uptr)&pBlock->pStateEnd); + *ajmp = (jumpAddr - ((uptr)ajmp + 4)); + } + } +} + +void normBranch(mV, microFlagCycles& mFC) { + using namespace x86Emitter; + microBlock* pBlock; + incPC(-3); // Go back to branch opcode (to get branch imm addr) + + // E-bit Branch + if (mVUup.eBit) { iPC = branchAddr/4; mVUendProgram(mVU, &mFC, 1); return; } + mVUsetupBranch(mVU, mFC); + + // Check if branch-block has already been compiled + blockCreate(branchAddr/8); + pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); + if (pBlock) { xJMP(pBlock->x86ptrStart); } + else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); } +} + +void normJump(mV, microFlagCycles& mFC, microBlock* &pBlock) { + using namespace x86Emitter; + mVUprint("mVUcompile JR/JALR"); + incPC(-3); // Go back to jump opcode + + if (mVUlow.constJump.isValid) { + if (mVUup.eBit) { // E-bit Jump + iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1); + mVUendProgram(mVU, &mFC, 1); + } + else { + int jumpAddr = (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8); + mVUsetupBranch(mVU, mFC); + // Check if jump-to-block has already been compiled + blockCreate(jumpAddr/8); + pBlock = mVUblocks[jumpAddr/8]->search((microRegInfo*)&mVUregs); + if (pBlock) { xJMP(pBlock->x86ptrStart); } + else { mVUcompile(mVU, jumpAddr, (uptr)&mVUregs); } + } + return; + } + + if (mVUup.eBit) { // E-bit Jump + mVUendProgram(mVU, &mFC, 2); + MOV32MtoR(gprT1, (uptr)&mVU->branch); + MOV32RtoM((uptr)&mVU->regs->VI[REG_TPC].UL, gprT1); + xJMP(mVU->exitFunct); + return; + } + + memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + mVUsetupBranch(mVU, mFC); + + mVUbackupRegs(mVU); + MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) + MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall) + + if (!mVU->index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState) + else xCALL(mVUcompileJIT<1>); + mVUrestoreRegs(mVU); + JMPR(gprT1); // Jump to rec-code address +} diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index d19c546733..3013f1e60c 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -22,23 +22,6 @@ // Helper Macros //------------------------------------------------------------------ -#define branchCase(JMPcond) branchCaseFunct(mVU, bBlock, xStatus, xMac, xClip, xCycles, ajmp, JMPcond); break - -#define branchWarning() { \ - if (mVUbranch) { \ - Console::Error("microVU%d Warning: Branch in E-bit/Branch delay slot! [%04x]", params mVU->index, xPC); \ - mVUlow.isNOP = 1; \ - } \ -} - -#define startLoop() { \ - if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \ - if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \ - if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \ - memset(&mVUinfo, 0, sizeof(mVUinfo)); \ - memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \ -} - #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } #define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; } #define tCycles(dest, src) { dest = aMax(dest, src); } @@ -46,52 +29,11 @@ #define incQ() { mVU->q = (mVU->q+1) & 1; } #define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } #define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } -#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); } //------------------------------------------------------------------ // Helper Functions //------------------------------------------------------------------ -microVUt(void) doSwapOp(mV) { - if (mVUinfo.backupVF && !mVUlow.noWriteVF) { - DevCon::Status("microVU%d: Backing Up VF Reg [%04x]", params getIndex, xPC); - int t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg); - int t2 = mVU->regAlloc->allocReg(); - SSE_MOVAPS_XMM_to_XMM(t2, t1); - mVU->regAlloc->clearNeeded(t1); - mVUopL(mVU, 1); - t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0); - SSE_XORPS_XMM_to_XMM(t2, t1); - SSE_XORPS_XMM_to_XMM(t1, t2); - SSE_XORPS_XMM_to_XMM(t2, t1); - mVU->regAlloc->clearNeeded(t1); - incPC(1); - doUpperOp(); - t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf); - SSE_MOVAPS_XMM_to_XMM(t1, t2); - mVU->regAlloc->clearNeeded(t1); - mVU->regAlloc->clearNeeded(t2); - } - else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } -} - -microVUt(void) doIbit(microVU* mVU) { - if (mVUup.iBit) { - incPC(-1); - u32 tempI; - mVU->regAlloc->clearRegVF(33); - - if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { - Console::Status("microVU%d: Clamping I Reg", params mVU->index); - tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg - } - else tempI = curI; - - MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI); - incPC(1); - } -} - // Used by mVUsetupRange microVUt(void) mVUcheckIsSame(mV) { @@ -167,6 +109,61 @@ microVUt(void) mVUsetupRange(mV, s32 pc, bool isStartPC) { } } +microVUt(void) startLoop(mV) { + if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } + if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } + if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } + memset(&mVUinfo, 0, sizeof(mVUinfo)); + memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); +} + +microVUt(void) doIbit(mV) { + if (mVUup.iBit) { + incPC(-1); + u32 tempI; + mVU->regAlloc->clearRegVF(33); + + if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { + Console::Status("microVU%d: Clamping I Reg", params mVU->index); + tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg + } + else tempI = curI; + + MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI); + incPC(1); + } +} + +microVUt(void) doSwapOp(mV) { + if (mVUinfo.backupVF && !mVUlow.noWriteVF) { + DevCon::Status("microVU%d: Backing Up VF Reg [%04x]", params getIndex, xPC); + int t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg); + int t2 = mVU->regAlloc->allocReg(); + SSE_MOVAPS_XMM_to_XMM(t2, t1); + mVU->regAlloc->clearNeeded(t1); + mVUopL(mVU, 1); + t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0); + SSE_XORPS_XMM_to_XMM(t2, t1); + SSE_XORPS_XMM_to_XMM(t1, t2); + SSE_XORPS_XMM_to_XMM(t2, t1); + mVU->regAlloc->clearNeeded(t1); + incPC(1); + doUpperOp(); + t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf); + SSE_MOVAPS_XMM_to_XMM(t1, t2); + mVU->regAlloc->clearNeeded(t1); + mVU->regAlloc->clearNeeded(t2); + } + else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } +} + +microVUt(void) branchWarning(mV) { + if (mVUbranch) { + Console::Error("microVU%d Warning: Branch in E-bit/Branch delay slot! [%04x]", params mVU->index, xPC); + mVUlow.isNOP = 1; + } +} + // Optimizes the End Pipeline State Removing Unnecessary Info microVUt(void) mVUoptimizePipeState(mV) { for (int i = 0; i < 32; i++) { @@ -183,20 +180,6 @@ microVUt(void) mVUoptimizePipeState(mV) { mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info } -// Recompiles Code for Proper Flags and Q/P regs on Block Linkings -microVUt(void) mVUsetupBranch(mV, int* xStatus, int* xMac, int* xClip, int xCycles) { - mVUprint("mVUsetupBranch"); - - // Flush Allocated Regs - mVU->regAlloc->flushAll(); - - // Shuffle Flag Instances - mVUsetupFlags(mVU, xStatus, xMac, xClip, xCycles); - - // Shuffle P/Q regs since every block starts at instance #0 - if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); } -} - microVUt(void) mVUincCycles(mV, int x) { mVUcycles += x; for (int z = 31; z > 0; z--) { @@ -269,87 +252,6 @@ microVUt(void) mVUsetCycles(mV) { tCycles(mVUregs.xgkick, mVUregsTemp.xgkick); } -#define sI ((mVUpBlock->pState.needExactMatch & 0x000f) ? 0 : ((mVUpBlock->pState.flags >> 0) & 3)) -#define cI ((mVUpBlock->pState.needExactMatch & 0x0f00) ? 0 : ((mVUpBlock->pState.flags >> 2) & 3)) - -microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip) { - - int fStatus = (isEbit) ? findFlagInst(xStatus, 0x7fffffff) : sI; - int fMac = (isEbit) ? findFlagInst(xMac, 0x7fffffff) : 0; - int fClip = (isEbit) ? findFlagInst(xClip, 0x7fffffff) : cI; - int qInst = 0; - int pInst = 0; - mVU->regAlloc->flushAll(); - - if (isEbit) { - mVUprint("mVUcompile ebit"); - memset(&mVUinfo, 0, sizeof(mVUinfo)); - memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); - mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0) - mVUcycles -= 100; - qInst = mVU->q; - pInst = mVU->p; - if (mVUinfo.doDivFlag) { - sFLAG.doFlag = 1; - sFLAG.write = fStatus; - mVUdivSet(mVU); - } - if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } - } - - // Save P/Q Regs - if (qInst) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); - if (isVU1) { - SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, pInst ? 3 : 2); - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); - } - - // Save Flag Instances - mVUallocSFLAGc(gprT1, gprT2, fStatus); - MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1); - mVUallocMFLAGa(mVU, gprT1, fMac); - mVUallocCFLAGa(mVU, gprT2, fClip); - MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1); - MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2); - - if (isEbit || isVU1) { // Clear 'is busy' Flags - AND32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag - AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif - } - - if (isEbit != 2) { // Save PC, and Jump to Exit Point - MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC); - JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); - } -} - -void branchCaseFunct(mV, microBlock* &bBlock, int* xStatus, int* xMac, int* xClip, int &xCycles, s32* &ajmp, int JMPcc) { - using namespace x86Emitter; - mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles); - xCMP(ptr16[&mVU->branch], 0); - if (mVUup.eBit) { // Conditional Branch With E-Bit Set - mVUendProgram(mVU, 2, xStatus, xMac, xClip); - xForwardJump8 eJMP((JccComparisonType)JMPcc); - incPC(1); // Set PC to First instruction of Non-Taken Side - xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC); - xJMP(mVU->exitFunct); - eJMP.SetTarget(); - incPC(-4); // Go Back to Branch Opcode to get branchAddr - iPC = branchAddr/4; - xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC); - xJMP(mVU->exitFunct); - } - else { // Normal Conditional Branch - incPC2(1); // Check if Branch Non-Taken Side has already been recompiled - blockCreate(iPC/2); - bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); - incPC2(-1); - if (bBlock) { xJcc( xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart ); } - else { ajmp = xJcc32((JccComparisonType)JMPcc); } - } -} - void __fastcall mVUwarning0(u32 PC) { Console::Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x]", params PC); } void __fastcall mVUwarning1(u32 PC) { Console::Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x]", params PC); } void __fastcall mVUprintPC1(u32 PC) { Console::Write("Block PC [%04x] ", params PC); } @@ -365,12 +267,13 @@ microVUt(void) mVUtestCycles(mV) { if (isVU1) CALLFunc((uptr)mVUwarning1); //else CALLFunc((uptr)mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation MOV32ItoR(gprR, Roffset); // Restore gprR - mVUendProgram(mVU, 0, NULL, NULL, NULL); + mVUendProgram(mVU, NULL, 0); x86SetJ32(jmp32); } } -microVUt(void) mVUinitConstValues(mV) { +// Initialize VI Constants (vi15 propagates through blocks) +microVUt(void) mVUinitConstValues(microVU* mVU) { for (int i = 0; i < 16; i++) { mVUconstReg[i].isValid = 0; mVUconstReg[i].regValue = 0; @@ -379,6 +282,24 @@ microVUt(void) mVUinitConstValues(mV) { mVUconstReg[15].regValue = mVUconstReg[15].isValid ? (mVUregs.vi15&0xffff) : 0; } +// Initialize Variables +microVUt(void) mVUinitFirstPass(microVU* mVU, microBlock* &pBlock, uptr pState, u8* thisPtr) { + mVUstartPC = iPC; // Block Start PC + mVUbranch = 0; // Branch Type + mVUcount = 0; // Number of instructions ran + mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage + mVU->p = 0; // All blocks start at p index #0 + mVU->q = 0; // All blocks start at q index #0 + memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info + mVUblock.x86ptrStart = thisPtr; + pBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager + mVUpBlock = pBlock; + mVUregs.flags = 0; + mVUflagInfo = 0; + mVUsFlagHack = CHECK_VU_FLAGHACK; + mVUinitConstValues(mVU); +} + //------------------------------------------------------------------ // Recompiler //------------------------------------------------------------------ @@ -386,38 +307,20 @@ microVUt(void) mVUinitConstValues(mV) { microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { using namespace x86Emitter; - microBlock* pBlock = NULL; - u8* thisPtr = x86Ptr; - const u32 endCount = (mVU->microMemSize / 8) - 1; - - // Setup Program Bounds/Range - mVUsetupRange(mVU, startPC, 1); - - // Reset regAlloc - mVU->regAlloc->reset(); + microFlagCycles mFC; + microBlock* pBlock = NULL; + u8* thisPtr = x86Ptr; + const u32 endCount = (mVU->microMemSize / 8) - 1; // First Pass iPC = startPC / 4; - setCode(); - mVUbranch = 0; - mVUstartPC = iPC; - mVUcount = 0; - mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage - mVU->p = 0; // All blocks start at p index #0 - mVU->q = 0; // All blocks start at q index #0 - memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info - mVUblock.x86ptrStart = thisPtr; - pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager - mVUpBlock = pBlock; - mVUregs.flags = 0; - mVUflagInfo = 0; - mVUsFlagHack = CHECK_VU_FLAGHACK; - - mVUinitConstValues(mVU); - + mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range + mVU->regAlloc->reset(); // Reset regAlloc + mVUinitFirstPass(mVU, pBlock, pState, thisPtr); + for (int branch = 0; mVUcount < endCount; mVUcount++) { incPC(1); - startLoop(); + startLoop(mVU); mVUincCycles(mVU, 1); mVUopU(mVU, 0); if (curI & _Ebit_) { branch = 1; mVUup.eBit = 1; } @@ -430,19 +333,16 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { mVUinfo.writeQ = !mVU->q; mVUinfo.readP = mVU->p; mVUinfo.writeP = !mVU->p; - if (branch >= 2) { mVUinfo.isEOB = 1; if (branch == 3) { mVUinfo.isBdelay = 1; } mVUcount++; branchWarning(); break; } + if (branch >= 2) { mVUinfo.isEOB = 1; if (branch == 3) { mVUinfo.isBdelay = 1; } mVUcount++; branchWarning(mVU); break; } else if (branch == 1) { branch = 2; } if (mVUbranch) { mVUsetFlagInfo(mVU); branch = 3; mVUbranch = 0; } incPC(1); } - // Sets Up Flag instances - int xStatus[4], xMac[4], xClip[4]; - int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip); - // Fix up vi15 const info for propagation through blocks mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0; - + + mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary @@ -450,8 +350,8 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { iPC = mVUstartPC; setCode(); mVUbranch = 0; - uint x; - for (x = 0; x < endCount; x++) { + u32 x = 0; + for (; x < endCount; x++) { if (mVUinfo.isEOB) { x = 0xffff; } if (mVUup.mBit) { OR32ItoM((uptr)&mVU->regs->flags, VUFLAG_MFLAGSET); } if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); } @@ -459,111 +359,27 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { else { doSwapOp(mVU); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } if (!doRegAlloc) { mVU->regAlloc->flushAll(); } - - if (!mVUinfo.isBdelay) { incPC(1); } + if (!mVUinfo.isBdelay) { incPC(1); } else { - microBlock* bBlock = NULL; - s32* ajmp = 0; mVUsetupRange(mVU, xPC, 0); mVUdebugNOW(1); - switch (mVUbranch) { - case 3: branchCase(Jcc_Equal); // IBEQ - case 4: branchCase(Jcc_GreaterOrEqual); // IBGEZ - case 5: branchCase(Jcc_Greater); // IBGTZ - case 6: branchCase(Jcc_LessOrEqual); // IBLEQ - case 7: branchCase(Jcc_Less); // IBLTZ - case 8: branchCase(Jcc_NotEqual); // IBNEQ - case 1: case 2: // B/BAL - - mVUprint("mVUcompile B/BAL"); - incPC(-3); // Go back to branch opcode (to get branch imm addr) - - if (mVUup.eBit) { iPC = branchAddr/4; mVUendProgram(mVU, 1, xStatus, xMac, xClip); } // E-bit Branch - mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles); - - // Check if branch-block has already been compiled - blockCreate(branchAddr/8); - pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); - if (pBlock) { xJMP(pBlock->x86ptrStart); } - else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); } - return thisPtr; - case 9: case 10: // JR/JALR - - mVUprint("mVUcompile JR/JALR"); - incPC(-3); // Go back to jump opcode - - if (mVUlow.constJump.isValid) { - if (mVUup.eBit) { // E-bit Jump - iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1); - mVUendProgram(mVU, 1, xStatus, xMac, xClip); - } - else { - int jumpAddr = (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8); - mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles); - // Check if jump-to-block has already been compiled - blockCreate(jumpAddr/8); - pBlock = mVUblocks[jumpAddr/8]->search((microRegInfo*)&mVUregs); - if (pBlock) { xJMP(pBlock->x86ptrStart); } - else { mVUcompile(mVU, jumpAddr, (uptr)&mVUregs); } - } - return thisPtr; - } - - if (mVUup.eBit) { // E-bit Jump - mVUendProgram(mVU, 2, xStatus, xMac, xClip); - MOV32MtoR(gprT1, (uptr)&mVU->branch); - MOV32RtoM((uptr)&mVU->regs->VI[REG_TPC].UL, gprT1); - xJMP(mVU->exitFunct); - return thisPtr; - } - - memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); - mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles); - - mVUbackupRegs(mVU); - MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) - MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall) - - if (!mVU->index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState) - else xCALL(mVUcompileJIT<1>); - mVUrestoreRegs(mVU); - JMPR(gprT1); // Jump to rec-code address - return thisPtr; + case 3: condBranch(mVU, mFC, pBlock, Jcc_Equal); return thisPtr; // IBEQ + case 4: condBranch(mVU, mFC, pBlock, Jcc_GreaterOrEqual); return thisPtr; // IBGEZ + case 5: condBranch(mVU, mFC, pBlock, Jcc_Greater); return thisPtr; // IBGTZ + case 6: condBranch(mVU, mFC, pBlock, Jcc_LessOrEqual); return thisPtr; // IBLEQ + case 7: condBranch(mVU, mFC, pBlock, Jcc_Less); return thisPtr; // IBLTZ + case 8: condBranch(mVU, mFC, pBlock, Jcc_NotEqual); return thisPtr; // IBNEQ + case 1: case 2: normBranch(mVU, mFC); return thisPtr; // B/BAL + case 9: case 10: normJump (mVU, mFC, pBlock); return thisPtr; // JR/JALR } - // Conditional Branches - mVUprint("mVUcompile conditional branch"); - if (mVUup.eBit) return thisPtr; // Handled in Branch Case - if (bBlock) { // Branch non-taken has already been compiled - incPC(-3); // Go back to branch opcode (to get branch imm addr) - - // Check if branch-block has already been compiled - blockCreate(branchAddr/8); - pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); - if (pBlock) { xJMP( pBlock->x86ptrStart ); } - else { mVUblockFetch(mVU, branchAddr, (uptr)&mVUregs); } - } - else { - uptr jumpAddr; - u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs so back them up - memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); - - incPC2(1); // Get PC for branch not-taken - mVUcompile(mVU, xPC, (uptr)&mVUregs); - - iPC = bPC; - incPC(-3); // Go back to branch opcode (to get branch imm addr) - jumpAddr = (uptr)mVUblockFetch(mVU, branchAddr, (uptr)&pBlock->pStateEnd); - *ajmp = (jumpAddr - ((uptr)ajmp + 4)); - } - return thisPtr; } } if (x == endCount) { Console::Error("microVU%d: Possible infinite compiling loop!", params mVU->index); } // E-bit End mVUsetupRange(mVU, xPC-8, 0); - mVUendProgram(mVU, 1, xStatus, xMac, xClip); + mVUendProgram(mVU, &mFC, 1); return thisPtr; } @@ -584,4 +400,3 @@ microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) { microVUx(void*) __fastcall mVUcompileJIT(u32 startPC, uptr pState) { return mVUblockFetch(mVUx, startPC, pState); } - diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 4abc004193..2f83c5e701 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -77,7 +77,7 @@ int sortFlag(int* fFlag, int* bFlag, int cycles) { #define sFlagCond ((sFLAG.doFlag && !mVUsFlagHack) || mVUlow.isFSSET || mVUinfo.doDivFlag) // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! -microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { +microVUt(void) mVUsetFlags(mV, microFlagCycles& mFC) { int endPC = iPC; u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking @@ -95,31 +95,31 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { // Status/Mac Flags Setup Code int xS = 0, xM = 0, xC = 0; for (int i = 0; i < 4; i++) { - xStatus[i] = i; - xMac [i] = i; - xClip [i] = i; + mFC.xStatus[i] = i; + mFC.xMac [i] = i; + mFC.xClip [i] = i; } if (!(mVUpBlock->pState.needExactMatch & 0x00f)) { xS = (mVUpBlock->pState.flags >> 0) & 3; - xStatus[0] = -1; xStatus[1] = -1; - xStatus[2] = -1; xStatus[3] = -1; - xStatus[(xS-1)&3] = 0; + mFC.xStatus[0] = -1; mFC.xStatus[1] = -1; + mFC.xStatus[2] = -1; mFC.xStatus[3] = -1; + mFC.xStatus[(xS-1)&3] = 0; } if (!(mVUpBlock->pState.needExactMatch & 0xf00)) { xC = (mVUpBlock->pState.flags >> 2) & 3; - xClip[0] = -1; xClip[1] = -1; - xClip[2] = -1; xClip[3] = -1; - xClip[(xC-1)&3] = 0; + mFC.xClip[0] = -1; mFC.xClip[1] = -1; + mFC.xClip[2] = -1; mFC.xClip[3] = -1; + mFC.xClip[(xC-1)&3] = 0; } if (!(mVUpBlock->pState.needExactMatch & 0x0f0)) { - xMac[0] = -1; xMac[1] = -1; - xMac[2] = -1; xMac[3] = -1; + mFC.xMac[0] = -1; mFC.xMac[1] = -1; + mFC.xMac[2] = -1; mFC.xMac[3] = -1; } - int cycles = 0; + mFC.cycles = 0; u32 xCount = mVUcount; // Backup count iPC = mVUstartPC; for (mVUcount = 0; mVUcount < xCount; mVUcount++) { @@ -129,11 +129,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { } else mVUstatusFlagOp(mVU); } - cycles += mVUstall; + mFC.cycles += mVUstall; - sFLAG.read = findFlagInst(xStatus, cycles); - mFLAG.read = findFlagInst(xMac, cycles); - cFLAG.read = findFlagInst(xClip, cycles); + sFLAG.read = findFlagInst(mFC.xStatus, mFC.cycles); + mFLAG.read = findFlagInst(mFC.xMac, mFC.cycles); + cFLAG.read = findFlagInst(mFC.xClip, mFC.cycles); sFLAG.write = xS; mFLAG.write = xM; @@ -143,17 +143,16 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { mFLAG.lastWrite = (xM-1) & 3; cFLAG.lastWrite = (xC-1) & 3; - if (sFlagCond) { xStatus[xS] = cycles + 4; xS = (xS+1) & 3; } - if (mFLAG.doFlag) { xMac [xM] = cycles + 4; xM = (xM+1) & 3; } - if (cFLAG.doFlag) { xClip [xC] = cycles + 4; xC = (xC+1) & 3; } + if (sFlagCond) { mFC.xStatus[xS] = mFC.cycles + 4; xS = (xS+1) & 3; } + if (mFLAG.doFlag) { mFC.xMac [xM] = mFC.cycles + 4; xM = (xM+1) & 3; } + if (cFLAG.doFlag) { mFC.xClip [xC] = mFC.cycles + 4; xC = (xC+1) & 3; } - cycles++; + mFC.cycles++; incPC2(2); } mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS); iPC = endPC; - return cycles; } #define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) @@ -164,11 +163,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { #define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0]) // Recompiles Code for Proper Flags on Block Linkings -microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles) { +microVUt(void) mVUsetupFlags(mV, microFlagCycles& mFC) { if (__Status) { int bStatus[4]; - int sortRegs = sortFlag(xStatus, bStatus, cycles); + int sortRegs = sortFlag(mFC.xStatus, bStatus, mFC.cycles); // DevCon::Status("sortRegs = %d", params sortRegs); // Note: Emitter will optimize out mov(reg1, reg1) cases... if (sortRegs == 1) { @@ -207,7 +206,7 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles if (__Mac) { int bMac[4]; - sortFlag(xMac, bMac, cycles); + sortFlag(mFC.xMac, bMac, mFC.cycles); SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->macFlag); SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleMac); SSE_MOVAPS_XMM_to_M128((uptr)mVU->macFlag, xmmT1); @@ -215,7 +214,7 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles if (__Clip) { int bClip[4]; - sortFlag(xClip, bClip, cycles); + sortFlag(mFC.xClip, bClip, mFC.cycles); SSE_MOVAPS_M128_to_XMM(xmmT2, (uptr)mVU->clipFlag); SSE_SHUFPS_XMM_to_XMM (xmmT2, xmmT2, shuffleClip); SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT2); diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index f8faadd5ca..872dce21d9 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -122,6 +122,13 @@ struct microFlagInst { u8 read; // Points to the instance that should be read by a lower instruction (t-stage read) }; +struct microFlagCycles { + int xStatus[4]; + int xMac[4]; + int xClip[4]; + int cycles; +}; + struct microOp { u8 stall; // Info on how much current instruction stalled bool isEOB; // Cur Instruction is last instruction in block (End of Block)