- Cleaned up microVU_Compile
- Added file microVU_Branch

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1621 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-08-13 05:04:04 +00:00
parent ab010b2b4c
commit 18a12b10b0
6 changed files with 338 additions and 311 deletions

View File

@ -2076,6 +2076,10 @@
RelativePath="..\..\x86\microVU_Analyze.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Branch.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Compile.inl"
>

View File

@ -206,5 +206,6 @@ typedef void (__fastcall *mVUrecCall)(u32, u32);
#include "microVU_Lower.inl"
#include "microVU_Tables.inl"
#include "microVU_Flags.inl"
#include "microVU_Branch.inl"
#include "microVU_Compile.inl"
#include "microVU_Execute.inl"

View File

@ -0,0 +1,201 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
microVUt(void) mVUincCycles(mV, int x);
microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState);
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); }
#define sI ((mVUpBlock->pState.needExactMatch & 0x000f) ? 0 : ((mVUpBlock->pState.flags >> 0) & 3))
#define cI ((mVUpBlock->pState.needExactMatch & 0x0f00) ? 0 : ((mVUpBlock->pState.flags >> 2) & 3))
microVUt(void) mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
int fStatus = (isEbit) ? findFlagInst(mFC->xStatus, 0x7fffffff) : sI;
int fMac = (isEbit) ? findFlagInst(mFC->xMac, 0x7fffffff) : 0;
int fClip = (isEbit) ? findFlagInst(mFC->xClip, 0x7fffffff) : cI;
int qInst = 0;
int pInst = 0;
mVU->regAlloc->flushAll();
if (isEbit) {
mVUprint("mVUcompile ebit");
memset(&mVUinfo, 0, sizeof(mVUinfo));
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp));
mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100;
qInst = mVU->q;
pInst = mVU->p;
if (mVUinfo.doDivFlag) {
sFLAG.doFlag = 1;
sFLAG.write = fStatus;
mVUdivSet(mVU);
}
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
}
// Save P/Q Regs
if (qInst) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ);
if (isVU1) {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, pInst ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ);
}
// Save Flag Instances
mVUallocSFLAGc(gprT1, gprT2, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1);
mVUallocMFLAGa(mVU, gprT1, fMac);
mVUallocCFLAGa(mVU, gprT2, fClip);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);
if (isEbit || isVU1) { // Clear 'is busy' Flags
AND32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
}
if (isEbit != 2) { // Save PC, and Jump to Exit Point
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
}
}
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
microVUt(void) mVUsetupBranch(mV, microFlagCycles& mFC) {
mVUprint("mVUsetupBranch");
// Flush Allocated Regs
mVU->regAlloc->flushAll();
// Shuffle Flag Instances
mVUsetupFlags(mVU, mFC);
// Shuffle P/Q regs since every block starts at instance #0
if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); }
}
void condBranch(mV, microFlagCycles& mFC, microBlock* &pBlock, int JMPcc) {
using namespace x86Emitter;
mVUsetupBranch(mVU, mFC);
xCMP(ptr16[&mVU->branch], 0);
if (mVUup.eBit) { // Conditional Branch With E-Bit Set
mVUendProgram(mVU, &mFC, 2);
xForwardJump8 eJMP((JccComparisonType)JMPcc);
incPC(1); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC);
xJMP(mVU->exitFunct);
eJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr/4;
xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC);
xJMP(mVU->exitFunct);
return;
}
else { // Normal Conditional Branch
microBlock* bBlock;
incPC2(1); // Check if Branch Non-Taken Side has already been recompiled
blockCreate(iPC/2);
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs);
incPC2(-1);
if (bBlock) { // Branch non-taken has already been compiled
xJcc( xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart );
// Check if branch-block has already been compiled
incPC(-3); // Go back to branch opcode (to get branch imm addr)
blockCreate(branchAddr/8);
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP( pBlock->x86ptrStart ); }
else { mVUblockFetch(mVU, branchAddr, (uptr)&mVUregs); }
}
else {
s32* ajmp = xJcc32((JccComparisonType)JMPcc);
uptr jumpAddr;
u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs so back them up
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
incPC2(1); // Get PC for branch not-taken
mVUcompile(mVU, xPC, (uptr)&mVUregs);
iPC = bPC;
incPC(-3); // Go back to branch opcode (to get branch imm addr)
jumpAddr = (uptr)mVUblockFetch(mVU, branchAddr, (uptr)&pBlock->pStateEnd);
*ajmp = (jumpAddr - ((uptr)ajmp + 4));
}
}
}
void normBranch(mV, microFlagCycles& mFC) {
using namespace x86Emitter;
microBlock* pBlock;
incPC(-3); // Go back to branch opcode (to get branch imm addr)
// E-bit Branch
if (mVUup.eBit) { iPC = branchAddr/4; mVUendProgram(mVU, &mFC, 1); return; }
mVUsetupBranch(mVU, mFC);
// Check if branch-block has already been compiled
blockCreate(branchAddr/8);
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); }
}
void normJump(mV, microFlagCycles& mFC, microBlock* &pBlock) {
using namespace x86Emitter;
mVUprint("mVUcompile JR/JALR");
incPC(-3); // Go back to jump opcode
if (mVUlow.constJump.isValid) {
if (mVUup.eBit) { // E-bit Jump
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);
mVUendProgram(mVU, &mFC, 1);
}
else {
int jumpAddr = (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8);
mVUsetupBranch(mVU, mFC);
// Check if jump-to-block has already been compiled
blockCreate(jumpAddr/8);
pBlock = mVUblocks[jumpAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, jumpAddr, (uptr)&mVUregs); }
}
return;
}
if (mVUup.eBit) { // E-bit Jump
mVUendProgram(mVU, &mFC, 2);
MOV32MtoR(gprT1, (uptr)&mVU->branch);
MOV32RtoM((uptr)&mVU->regs->VI[REG_TPC].UL, gprT1);
xJMP(mVU->exitFunct);
return;
}
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch(mVU, mFC);
mVUbackupRegs(mVU);
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
if (!mVU->index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState)
else xCALL(mVUcompileJIT<1>);
mVUrestoreRegs(mVU);
JMPR(gprT1); // Jump to rec-code address
}

View File

@ -22,23 +22,6 @@
// Helper Macros
//------------------------------------------------------------------
#define branchCase(JMPcond) branchCaseFunct(mVU, bBlock, xStatus, xMac, xClip, xCycles, ajmp, JMPcond); break
#define branchWarning() { \
if (mVUbranch) { \
Console::Error("microVU%d Warning: Branch in E-bit/Branch delay slot! [%04x]", params mVU->index, xPC); \
mVUlow.isNOP = 1; \
} \
}
#define startLoop() { \
if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \
if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \
if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \
memset(&mVUinfo, 0, sizeof(mVUinfo)); \
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \
}
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
#define tCycles(dest, src) { dest = aMax(dest, src); }
@ -46,52 +29,11 @@
#define incQ() { mVU->q = (mVU->q+1) & 1; }
#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); }
//------------------------------------------------------------------
// Helper Functions
//------------------------------------------------------------------
microVUt(void) doSwapOp(mV) {
if (mVUinfo.backupVF && !mVUlow.noWriteVF) {
DevCon::Status("microVU%d: Backing Up VF Reg [%04x]", params getIndex, xPC);
int t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg);
int t2 = mVU->regAlloc->allocReg();
SSE_MOVAPS_XMM_to_XMM(t2, t1);
mVU->regAlloc->clearNeeded(t1);
mVUopL(mVU, 1);
t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0);
SSE_XORPS_XMM_to_XMM(t2, t1);
SSE_XORPS_XMM_to_XMM(t1, t2);
SSE_XORPS_XMM_to_XMM(t2, t1);
mVU->regAlloc->clearNeeded(t1);
incPC(1);
doUpperOp();
t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
SSE_MOVAPS_XMM_to_XMM(t1, t2);
mVU->regAlloc->clearNeeded(t1);
mVU->regAlloc->clearNeeded(t2);
}
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
}
microVUt(void) doIbit(microVU* mVU) {
if (mVUup.iBit) {
incPC(-1);
u32 tempI;
mVU->regAlloc->clearRegVF(33);
if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) {
Console::Status("microVU%d: Clamping I Reg", params mVU->index);
tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
}
else tempI = curI;
MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI);
incPC(1);
}
}
// Used by mVUsetupRange
microVUt(void) mVUcheckIsSame(mV) {
@ -167,6 +109,61 @@ microVUt(void) mVUsetupRange(mV, s32 pc, bool isStartPC) {
}
}
microVUt(void) startLoop(mV) {
if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); }
if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); }
if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); }
memset(&mVUinfo, 0, sizeof(mVUinfo));
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp));
}
microVUt(void) doIbit(mV) {
if (mVUup.iBit) {
incPC(-1);
u32 tempI;
mVU->regAlloc->clearRegVF(33);
if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) {
Console::Status("microVU%d: Clamping I Reg", params mVU->index);
tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
}
else tempI = curI;
MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI);
incPC(1);
}
}
microVUt(void) doSwapOp(mV) {
if (mVUinfo.backupVF && !mVUlow.noWriteVF) {
DevCon::Status("microVU%d: Backing Up VF Reg [%04x]", params getIndex, xPC);
int t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg);
int t2 = mVU->regAlloc->allocReg();
SSE_MOVAPS_XMM_to_XMM(t2, t1);
mVU->regAlloc->clearNeeded(t1);
mVUopL(mVU, 1);
t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0);
SSE_XORPS_XMM_to_XMM(t2, t1);
SSE_XORPS_XMM_to_XMM(t1, t2);
SSE_XORPS_XMM_to_XMM(t2, t1);
mVU->regAlloc->clearNeeded(t1);
incPC(1);
doUpperOp();
t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
SSE_MOVAPS_XMM_to_XMM(t1, t2);
mVU->regAlloc->clearNeeded(t1);
mVU->regAlloc->clearNeeded(t2);
}
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
}
microVUt(void) branchWarning(mV) {
if (mVUbranch) {
Console::Error("microVU%d Warning: Branch in E-bit/Branch delay slot! [%04x]", params mVU->index, xPC);
mVUlow.isNOP = 1;
}
}
// Optimizes the End Pipeline State Removing Unnecessary Info
microVUt(void) mVUoptimizePipeState(mV) {
for (int i = 0; i < 32; i++) {
@ -183,20 +180,6 @@ microVUt(void) mVUoptimizePipeState(mV) {
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
}
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
microVUt(void) mVUsetupBranch(mV, int* xStatus, int* xMac, int* xClip, int xCycles) {
mVUprint("mVUsetupBranch");
// Flush Allocated Regs
mVU->regAlloc->flushAll();
// Shuffle Flag Instances
mVUsetupFlags(mVU, xStatus, xMac, xClip, xCycles);
// Shuffle P/Q regs since every block starts at instance #0
if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); }
}
microVUt(void) mVUincCycles(mV, int x) {
mVUcycles += x;
for (int z = 31; z > 0; z--) {
@ -269,87 +252,6 @@ microVUt(void) mVUsetCycles(mV) {
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
}
#define sI ((mVUpBlock->pState.needExactMatch & 0x000f) ? 0 : ((mVUpBlock->pState.flags >> 0) & 3))
#define cI ((mVUpBlock->pState.needExactMatch & 0x0f00) ? 0 : ((mVUpBlock->pState.flags >> 2) & 3))
microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip) {
int fStatus = (isEbit) ? findFlagInst(xStatus, 0x7fffffff) : sI;
int fMac = (isEbit) ? findFlagInst(xMac, 0x7fffffff) : 0;
int fClip = (isEbit) ? findFlagInst(xClip, 0x7fffffff) : cI;
int qInst = 0;
int pInst = 0;
mVU->regAlloc->flushAll();
if (isEbit) {
mVUprint("mVUcompile ebit");
memset(&mVUinfo, 0, sizeof(mVUinfo));
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp));
mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100;
qInst = mVU->q;
pInst = mVU->p;
if (mVUinfo.doDivFlag) {
sFLAG.doFlag = 1;
sFLAG.write = fStatus;
mVUdivSet(mVU);
}
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
}
// Save P/Q Regs
if (qInst) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ);
if (isVU1) {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, pInst ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ);
}
// Save Flag Instances
mVUallocSFLAGc(gprT1, gprT2, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1);
mVUallocMFLAGa(mVU, gprT1, fMac);
mVUallocCFLAGa(mVU, gprT2, fClip);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);
if (isEbit || isVU1) { // Clear 'is busy' Flags
AND32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
}
if (isEbit != 2) { // Save PC, and Jump to Exit Point
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
}
}
void branchCaseFunct(mV, microBlock* &bBlock, int* xStatus, int* xMac, int* xClip, int &xCycles, s32* &ajmp, int JMPcc) {
using namespace x86Emitter;
mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles);
xCMP(ptr16[&mVU->branch], 0);
if (mVUup.eBit) { // Conditional Branch With E-Bit Set
mVUendProgram(mVU, 2, xStatus, xMac, xClip);
xForwardJump8 eJMP((JccComparisonType)JMPcc);
incPC(1); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC);
xJMP(mVU->exitFunct);
eJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr/4;
xMOV(ptr32[&mVU->regs->VI[REG_TPC].UL], xPC);
xJMP(mVU->exitFunct);
}
else { // Normal Conditional Branch
incPC2(1); // Check if Branch Non-Taken Side has already been recompiled
blockCreate(iPC/2);
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs);
incPC2(-1);
if (bBlock) { xJcc( xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart ); }
else { ajmp = xJcc32((JccComparisonType)JMPcc); }
}
}
void __fastcall mVUwarning0(u32 PC) { Console::Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x]", params PC); }
void __fastcall mVUwarning1(u32 PC) { Console::Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x]", params PC); }
void __fastcall mVUprintPC1(u32 PC) { Console::Write("Block PC [%04x] ", params PC); }
@ -365,12 +267,13 @@ microVUt(void) mVUtestCycles(mV) {
if (isVU1) CALLFunc((uptr)mVUwarning1);
//else CALLFunc((uptr)mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
MOV32ItoR(gprR, Roffset); // Restore gprR
mVUendProgram(mVU, 0, NULL, NULL, NULL);
mVUendProgram(mVU, NULL, 0);
x86SetJ32(jmp32);
}
}
microVUt(void) mVUinitConstValues(mV) {
// Initialize VI Constants (vi15 propagates through blocks)
microVUt(void) mVUinitConstValues(microVU* mVU) {
for (int i = 0; i < 16; i++) {
mVUconstReg[i].isValid = 0;
mVUconstReg[i].regValue = 0;
@ -379,6 +282,24 @@ microVUt(void) mVUinitConstValues(mV) {
mVUconstReg[15].regValue = mVUconstReg[15].isValid ? (mVUregs.vi15&0xffff) : 0;
}
// Initialize Variables
microVUt(void) mVUinitFirstPass(microVU* mVU, microBlock* &pBlock, uptr pState, u8* thisPtr) {
mVUstartPC = iPC; // Block Start PC
mVUbranch = 0; // Branch Type
mVUcount = 0; // Number of instructions ran
mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage
mVU->p = 0; // All blocks start at p index #0
mVU->q = 0; // All blocks start at q index #0
memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info
mVUblock.x86ptrStart = thisPtr;
pBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager
mVUpBlock = pBlock;
mVUregs.flags = 0;
mVUflagInfo = 0;
mVUsFlagHack = CHECK_VU_FLAGHACK;
mVUinitConstValues(mVU);
}
//------------------------------------------------------------------
// Recompiler
//------------------------------------------------------------------
@ -386,38 +307,20 @@ microVUt(void) mVUinitConstValues(mV) {
microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
using namespace x86Emitter;
microFlagCycles mFC;
microBlock* pBlock = NULL;
u8* thisPtr = x86Ptr;
const u32 endCount = (mVU->microMemSize / 8) - 1;
// Setup Program Bounds/Range
mVUsetupRange(mVU, startPC, 1);
// Reset regAlloc
mVU->regAlloc->reset();
// First Pass
iPC = startPC / 4;
setCode();
mVUbranch = 0;
mVUstartPC = iPC;
mVUcount = 0;
mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage
mVU->p = 0; // All blocks start at p index #0
mVU->q = 0; // All blocks start at q index #0
memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info
mVUblock.x86ptrStart = thisPtr;
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
mVUpBlock = pBlock;
mVUregs.flags = 0;
mVUflagInfo = 0;
mVUsFlagHack = CHECK_VU_FLAGHACK;
mVUinitConstValues(mVU);
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
mVU->regAlloc->reset(); // Reset regAlloc
mVUinitFirstPass(mVU, pBlock, pState, thisPtr);
for (int branch = 0; mVUcount < endCount; mVUcount++) {
incPC(1);
startLoop();
startLoop(mVU);
mVUincCycles(mVU, 1);
mVUopU(mVU, 0);
if (curI & _Ebit_) { branch = 1; mVUup.eBit = 1; }
@ -430,19 +333,16 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
mVUinfo.writeQ = !mVU->q;
mVUinfo.readP = mVU->p;
mVUinfo.writeP = !mVU->p;
if (branch >= 2) { mVUinfo.isEOB = 1; if (branch == 3) { mVUinfo.isBdelay = 1; } mVUcount++; branchWarning(); break; }
if (branch >= 2) { mVUinfo.isEOB = 1; if (branch == 3) { mVUinfo.isBdelay = 1; } mVUcount++; branchWarning(mVU); break; }
else if (branch == 1) { branch = 2; }
if (mVUbranch) { mVUsetFlagInfo(mVU); branch = 3; mVUbranch = 0; }
incPC(1);
}
// Sets Up Flag instances
int xStatus[4], xMac[4], xClip[4];
int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip);
// Fix up vi15 const info for propagation through blocks
mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary
@ -450,8 +350,8 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
iPC = mVUstartPC;
setCode();
mVUbranch = 0;
uint x;
for (x = 0; x < endCount; x++) {
u32 x = 0;
for (; x < endCount; x++) {
if (mVUinfo.isEOB) { x = 0xffff; }
if (mVUup.mBit) { OR32ItoM((uptr)&mVU->regs->flags, VUFLAG_MFLAGSET); }
if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); }
@ -459,111 +359,27 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
else { doSwapOp(mVU); }
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
if (!doRegAlloc) { mVU->regAlloc->flushAll(); }
if (!mVUinfo.isBdelay) { incPC(1); }
else {
microBlock* bBlock = NULL;
s32* ajmp = 0;
mVUsetupRange(mVU, xPC, 0);
mVUdebugNOW(1);
switch (mVUbranch) {
case 3: branchCase(Jcc_Equal); // IBEQ
case 4: branchCase(Jcc_GreaterOrEqual); // IBGEZ
case 5: branchCase(Jcc_Greater); // IBGTZ
case 6: branchCase(Jcc_LessOrEqual); // IBLEQ
case 7: branchCase(Jcc_Less); // IBLTZ
case 8: branchCase(Jcc_NotEqual); // IBNEQ
case 1: case 2: // B/BAL
mVUprint("mVUcompile B/BAL");
incPC(-3); // Go back to branch opcode (to get branch imm addr)
if (mVUup.eBit) { iPC = branchAddr/4; mVUendProgram(mVU, 1, xStatus, xMac, xClip); } // E-bit Branch
mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles);
// Check if branch-block has already been compiled
blockCreate(branchAddr/8);
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); }
return thisPtr;
case 9: case 10: // JR/JALR
mVUprint("mVUcompile JR/JALR");
incPC(-3); // Go back to jump opcode
if (mVUlow.constJump.isValid) {
if (mVUup.eBit) { // E-bit Jump
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);
mVUendProgram(mVU, 1, xStatus, xMac, xClip);
case 3: condBranch(mVU, mFC, pBlock, Jcc_Equal); return thisPtr; // IBEQ
case 4: condBranch(mVU, mFC, pBlock, Jcc_GreaterOrEqual); return thisPtr; // IBGEZ
case 5: condBranch(mVU, mFC, pBlock, Jcc_Greater); return thisPtr; // IBGTZ
case 6: condBranch(mVU, mFC, pBlock, Jcc_LessOrEqual); return thisPtr; // IBLEQ
case 7: condBranch(mVU, mFC, pBlock, Jcc_Less); return thisPtr; // IBLTZ
case 8: condBranch(mVU, mFC, pBlock, Jcc_NotEqual); return thisPtr; // IBNEQ
case 1: case 2: normBranch(mVU, mFC); return thisPtr; // B/BAL
case 9: case 10: normJump (mVU, mFC, pBlock); return thisPtr; // JR/JALR
}
else {
int jumpAddr = (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8);
mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles);
// Check if jump-to-block has already been compiled
blockCreate(jumpAddr/8);
pBlock = mVUblocks[jumpAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, jumpAddr, (uptr)&mVUregs); }
}
return thisPtr;
}
if (mVUup.eBit) { // E-bit Jump
mVUendProgram(mVU, 2, xStatus, xMac, xClip);
MOV32MtoR(gprT1, (uptr)&mVU->branch);
MOV32RtoM((uptr)&mVU->regs->VI[REG_TPC].UL, gprT1);
xJMP(mVU->exitFunct);
return thisPtr;
}
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles);
mVUbackupRegs(mVU);
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
if (!mVU->index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState)
else xCALL(mVUcompileJIT<1>);
mVUrestoreRegs(mVU);
JMPR(gprT1); // Jump to rec-code address
return thisPtr;
}
// Conditional Branches
mVUprint("mVUcompile conditional branch");
if (mVUup.eBit) return thisPtr; // Handled in Branch Case
if (bBlock) { // Branch non-taken has already been compiled
incPC(-3); // Go back to branch opcode (to get branch imm addr)
// Check if branch-block has already been compiled
blockCreate(branchAddr/8);
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP( pBlock->x86ptrStart ); }
else { mVUblockFetch(mVU, branchAddr, (uptr)&mVUregs); }
}
else {
uptr jumpAddr;
u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs so back them up
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
incPC2(1); // Get PC for branch not-taken
mVUcompile(mVU, xPC, (uptr)&mVUregs);
iPC = bPC;
incPC(-3); // Go back to branch opcode (to get branch imm addr)
jumpAddr = (uptr)mVUblockFetch(mVU, branchAddr, (uptr)&pBlock->pStateEnd);
*ajmp = (jumpAddr - ((uptr)ajmp + 4));
}
return thisPtr;
}
}
if (x == endCount) { Console::Error("microVU%d: Possible infinite compiling loop!", params mVU->index); }
// E-bit End
mVUsetupRange(mVU, xPC-8, 0);
mVUendProgram(mVU, 1, xStatus, xMac, xClip);
mVUendProgram(mVU, &mFC, 1);
return thisPtr;
}
@ -584,4 +400,3 @@ microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) {
microVUx(void*) __fastcall mVUcompileJIT(u32 startPC, uptr pState) {
return mVUblockFetch(mVUx, startPC, pState);
}

View File

@ -77,7 +77,7 @@ int sortFlag(int* fFlag, int* bFlag, int cycles) {
#define sFlagCond ((sFLAG.doFlag && !mVUsFlagHack) || mVUlow.isFSSET || mVUinfo.doDivFlag)
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
microVUt(void) mVUsetFlags(mV, microFlagCycles& mFC) {
int endPC = iPC;
u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking
@ -95,31 +95,31 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
// Status/Mac Flags Setup Code
int xS = 0, xM = 0, xC = 0;
for (int i = 0; i < 4; i++) {
xStatus[i] = i;
xMac [i] = i;
xClip [i] = i;
mFC.xStatus[i] = i;
mFC.xMac [i] = i;
mFC.xClip [i] = i;
}
if (!(mVUpBlock->pState.needExactMatch & 0x00f)) {
xS = (mVUpBlock->pState.flags >> 0) & 3;
xStatus[0] = -1; xStatus[1] = -1;
xStatus[2] = -1; xStatus[3] = -1;
xStatus[(xS-1)&3] = 0;
mFC.xStatus[0] = -1; mFC.xStatus[1] = -1;
mFC.xStatus[2] = -1; mFC.xStatus[3] = -1;
mFC.xStatus[(xS-1)&3] = 0;
}
if (!(mVUpBlock->pState.needExactMatch & 0xf00)) {
xC = (mVUpBlock->pState.flags >> 2) & 3;
xClip[0] = -1; xClip[1] = -1;
xClip[2] = -1; xClip[3] = -1;
xClip[(xC-1)&3] = 0;
mFC.xClip[0] = -1; mFC.xClip[1] = -1;
mFC.xClip[2] = -1; mFC.xClip[3] = -1;
mFC.xClip[(xC-1)&3] = 0;
}
if (!(mVUpBlock->pState.needExactMatch & 0x0f0)) {
xMac[0] = -1; xMac[1] = -1;
xMac[2] = -1; xMac[3] = -1;
mFC.xMac[0] = -1; mFC.xMac[1] = -1;
mFC.xMac[2] = -1; mFC.xMac[3] = -1;
}
int cycles = 0;
mFC.cycles = 0;
u32 xCount = mVUcount; // Backup count
iPC = mVUstartPC;
for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
@ -129,11 +129,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
}
else mVUstatusFlagOp(mVU);
}
cycles += mVUstall;
mFC.cycles += mVUstall;
sFLAG.read = findFlagInst(xStatus, cycles);
mFLAG.read = findFlagInst(xMac, cycles);
cFLAG.read = findFlagInst(xClip, cycles);
sFLAG.read = findFlagInst(mFC.xStatus, mFC.cycles);
mFLAG.read = findFlagInst(mFC.xMac, mFC.cycles);
cFLAG.read = findFlagInst(mFC.xClip, mFC.cycles);
sFLAG.write = xS;
mFLAG.write = xM;
@ -143,17 +143,16 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
mFLAG.lastWrite = (xM-1) & 3;
cFLAG.lastWrite = (xC-1) & 3;
if (sFlagCond) { xStatus[xS] = cycles + 4; xS = (xS+1) & 3; }
if (mFLAG.doFlag) { xMac [xM] = cycles + 4; xM = (xM+1) & 3; }
if (cFLAG.doFlag) { xClip [xC] = cycles + 4; xC = (xC+1) & 3; }
if (sFlagCond) { mFC.xStatus[xS] = mFC.cycles + 4; xS = (xS+1) & 3; }
if (mFLAG.doFlag) { mFC.xMac [xM] = mFC.cycles + 4; xM = (xM+1) & 3; }
if (cFLAG.doFlag) { mFC.xClip [xC] = mFC.cycles + 4; xC = (xC+1) & 3; }
cycles++;
mFC.cycles++;
incPC2(2);
}
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
iPC = endPC;
return cycles;
}
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
@ -164,11 +163,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
#define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0])
// Recompiles Code for Proper Flags on Block Linkings
microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles) {
microVUt(void) mVUsetupFlags(mV, microFlagCycles& mFC) {
if (__Status) {
int bStatus[4];
int sortRegs = sortFlag(xStatus, bStatus, cycles);
int sortRegs = sortFlag(mFC.xStatus, bStatus, mFC.cycles);
// DevCon::Status("sortRegs = %d", params sortRegs);
// Note: Emitter will optimize out mov(reg1, reg1) cases...
if (sortRegs == 1) {
@ -207,7 +206,7 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles
if (__Mac) {
int bMac[4];
sortFlag(xMac, bMac, cycles);
sortFlag(mFC.xMac, bMac, mFC.cycles);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->macFlag);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleMac);
SSE_MOVAPS_XMM_to_M128((uptr)mVU->macFlag, xmmT1);
@ -215,7 +214,7 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles
if (__Clip) {
int bClip[4];
sortFlag(xClip, bClip, cycles);
sortFlag(mFC.xClip, bClip, mFC.cycles);
SSE_MOVAPS_M128_to_XMM(xmmT2, (uptr)mVU->clipFlag);
SSE_SHUFPS_XMM_to_XMM (xmmT2, xmmT2, shuffleClip);
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT2);

View File

@ -122,6 +122,13 @@ struct microFlagInst {
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
};
struct microFlagCycles {
int xStatus[4];
int xMac[4];
int xClip[4];
int cycles;
};
struct microOp {
u8 stall; // Info on how much current instruction stalled
bool isEOB; // Cur Instruction is last instruction in block (End of Block)