mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Major flag algorithm rewrites (work in progress) - Abused macros to create my tables, knocking off 800 lines of code. - VU0 and VU1 have different dynarec cache sizes now (vu0's is 1/4 the size of vu1's) - Minor changes... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1166 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
25090899ce
commit
d19229d334
|
@ -43,6 +43,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
|
|||
mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
|
||||
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
|
||||
mVU->cache = NULL;
|
||||
mVU->cacheSize = mVUcacheSize;
|
||||
memset(&mVU->prog, 0, sizeof(mVU->prog));
|
||||
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
|
||||
|
||||
|
|
|
@ -49,15 +49,15 @@ public:
|
|||
}
|
||||
microBlock* search(microRegInfo* pState) {
|
||||
if (listSize < 0) return NULL;
|
||||
if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
for (int i = 0; i <= listSize; i++) {
|
||||
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo) - 1)) return &blockList[i];
|
||||
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo)/* - 4*/)) return &blockList[i];
|
||||
}
|
||||
}
|
||||
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
for (int i = 0; i <= listSize; i++) {
|
||||
if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)
|
||||
&& (blockList[i].pState.clip == pState->clip)) { return &blockList[i]; }
|
||||
&& (blockList[i].pState.flags == pState->flags)) { return &blockList[i]; }
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
@ -87,11 +87,12 @@ struct microProgManager {
|
|||
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
|
||||
};
|
||||
|
||||
#define mVUcacheSize (0x1f00000 / ((vuIndex) ? 1 : 4))
|
||||
struct microVU {
|
||||
u32 index; // VU Index (VU0 or VU1)
|
||||
u32 microSize; // VU Micro Memory Size
|
||||
u32 progSize; // VU Micro Program Size (microSize/4)
|
||||
static const u32 cacheSize = 0xb00000; // VU Cache Size
|
||||
u32 cacheSize; // VU Cache Size
|
||||
|
||||
microProgManager<0x4000> prog; // Micro Program Data
|
||||
|
||||
|
|
|
@ -39,8 +39,8 @@ struct microRegInfo {
|
|||
u8 p;
|
||||
u8 r;
|
||||
u8 xgkick;
|
||||
u8 clip;
|
||||
u8 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last byte in struct)
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u32 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last 2 bytes in struct)
|
||||
#if defined(_MSC_VER)
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
@ -75,6 +75,7 @@ struct microAllocInfo {
|
|||
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
||||
u32 curPC; // Current PC
|
||||
u32 startPC; // Start PC for Cur Block
|
||||
u32 flagInfo; // Holds information to help with flag instances on block linking
|
||||
u32 info[pSize/8]; // Info for Instructions in current block
|
||||
u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
|
||||
};
|
||||
|
|
|
@ -249,9 +249,9 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
|
|||
microVUt(void) mVUanalyzeSflag(int It) {
|
||||
microVU* mVU = mVUx;
|
||||
if (!It) { mVUinfo |= _isNOP; }
|
||||
else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from
|
||||
else {
|
||||
mVUinfo |= _swapOps;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; }
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf /*<< mVUcount*/; }
|
||||
if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); }
|
||||
// Note: _isSflag is used for status flag optimizations.
|
||||
// Do to stalls, it can only be set one instruction prior to the status flag read instruction
|
||||
|
@ -279,7 +279,7 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) {
|
|||
if (!It) { mVUinfo |= _isNOP; }
|
||||
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
|
||||
mVUinfo |= _swapOps;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; }
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); }
|
||||
int curPC = iPC;
|
||||
for (int i = mVUcount, j = 0; i > 1; i--, j++) {
|
||||
incPC2(-2);
|
||||
|
@ -297,7 +297,8 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) {
|
|||
|
||||
microVUt(void) mVUanalyzeCflag() {
|
||||
microVU* mVU = mVUx;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; }
|
||||
mVUinfo |= _swapOps;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 8); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
|
|
@ -24,8 +24,7 @@
|
|||
//------------------------------------------------------------------
|
||||
|
||||
#define branchCase(JMPcc, nJMPcc) \
|
||||
mVUsetupBranch<vuIndex>(bStatus, bMac); \
|
||||
mVUprint("mVUcompile branchCase"); \
|
||||
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles); \
|
||||
CMP16ItoM((uptr)&mVU->branch, 0); \
|
||||
incPC2(1); \
|
||||
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \
|
||||
|
@ -46,12 +45,12 @@
|
|||
//------------------------------------------------------------------
|
||||
|
||||
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
|
||||
microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) {
|
||||
microVUt(void) mVUsetupBranch(int* xStatus, int* xMac, int* xClip, int xCycles) {
|
||||
microVU* mVU = mVUx;
|
||||
mVUprint("mVUsetupBranch");
|
||||
|
||||
// Shuffle Flag Instances
|
||||
mVUsetupFlags<vuIndex>(bStatus, bMac);
|
||||
mVUsetupFlags<vuIndex>(xStatus, xMac, xClip, xCycles);
|
||||
|
||||
// Shuffle P/Q regs since every block starts at instance #0
|
||||
if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); }
|
||||
|
@ -105,7 +104,7 @@ microVUt(void) mVUsetCycles() {
|
|||
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
|
||||
}
|
||||
|
||||
microVUt(void) mVUendProgram(int fStatus, int fMac) {
|
||||
microVUt(void) mVUendProgram(int fStatus, int fMac, int fClip) {
|
||||
microVU* mVU = mVUx;
|
||||
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
|
||||
mVUcycles -= 100;
|
||||
|
@ -121,8 +120,10 @@ microVUt(void) mVUendProgram(int fStatus, int fMac) {
|
|||
// Save Flag Instances
|
||||
getFlagReg(fStatus, fStatus);
|
||||
mVUallocMFLAGa<vuIndex>(gprT1, fMac);
|
||||
mVUallocCFLAGa<vuIndex>(gprT2, fClip);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);
|
||||
|
||||
//memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
|
||||
//MOV32ItoM((uptr)&mVU->prog.lpState, (int)&mVUblock.pState); // Save pipeline state (clipflag instance)
|
||||
|
@ -133,12 +134,15 @@ microVUt(void) mVUendProgram(int fStatus, int fMac) {
|
|||
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
|
||||
}
|
||||
|
||||
#define sI ((mVUpBlock->pState.needExactMatch & 0x000f) ? 0 : ((mVUpBlock->pState.flags >> 0) & 3))
|
||||
#define cI ((mVUpBlock->pState.needExactMatch & 0x0f00) ? 0 : ((mVUpBlock->pState.flags >> 2) & 3))
|
||||
|
||||
microVUt(void) mVUtestCycles() {
|
||||
microVU* mVU = mVUx;
|
||||
iPC = mVUstartPC;
|
||||
CMP32ItoM((uptr)&mVU->cycles, 0);
|
||||
u8* jmp8 = JG8(0);
|
||||
mVUendProgram<vuIndex>(0, 0);
|
||||
mVUendProgram<vuIndex>(sI, 0, cI);
|
||||
x86SetJ8(jmp8);
|
||||
SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
|
||||
}
|
||||
|
@ -171,8 +175,10 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
mVUblock.x86ptrStart = thisPtr;
|
||||
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
|
||||
mVUpBlock = pBlock;
|
||||
mVUregs.flags = 0;
|
||||
mVUflagInfo = 0;
|
||||
|
||||
for (int branch = 0;; ) {
|
||||
for (int branch = 0; mVUcount < (vuIndex ? (0x3fff/8) : (0xfff/8)); ) {
|
||||
incPC(1);
|
||||
mVUinfo = 0;
|
||||
incCycles(1);
|
||||
|
@ -186,20 +192,21 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
if (mVU->p) { mVUinfo |= _readP; }
|
||||
if (mVU->q) { mVUinfo |= _readQ; }
|
||||
else { mVUinfo |= _writeQ; }
|
||||
if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; }
|
||||
if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); mVUcount++; if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; }
|
||||
else if (branch == 1) { branch = 2; }
|
||||
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
|
||||
if (mVUbranch) { mVUsetFlagInfo<vuIndex>(); branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
|
||||
incPC(1);
|
||||
mVUcount++;
|
||||
}
|
||||
|
||||
// Sets Up Flag instances
|
||||
int bStatus[4]; int bMac[4];
|
||||
mVUsetFlags<vuIndex>(bStatus, bMac);
|
||||
int xStatus[4], xMac[4], xClip[4];
|
||||
int xCycles = mVUsetFlags<vuIndex>(xStatus, xMac, xClip);
|
||||
mVUtestCycles<vuIndex>();
|
||||
//SysPrintf("bS[0] = %08x, bS[1] = %08x, bS[2] = %08x, bS[3] = %08x\n", bStatus[0], bStatus[1], bStatus[2], bStatus[3]);
|
||||
//SysPrintf("bM[0] = %08x, bM[1] = %08x, bM[2] = %08x, bM[3] = %08x\n", bMac[0], bMac[1], bMac[2], bMac[3]);
|
||||
//SysPrintf("mVUcount = %d\n", mVUcount);
|
||||
//SysPrintf("mVUflagInfo = %d\n", mVUflagInfo);
|
||||
|
||||
// Second Pass
|
||||
iPC = mVUstartPC;
|
||||
|
@ -227,7 +234,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
|
||||
mVUprint("mVUcompile B/BAL");
|
||||
incPC(-3); // Go back to branch opcode (to get branch imm addr)
|
||||
mVUsetupBranch<vuIndex>(bStatus, bMac);
|
||||
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles);
|
||||
|
||||
// Check if branch-block has already been compiled
|
||||
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
|
||||
|
@ -239,7 +246,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
|
||||
mVUprint("mVUcompile JR/JALR");
|
||||
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
|
||||
mVUsetupBranch<vuIndex>(bStatus, bMac);
|
||||
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles);
|
||||
|
||||
mVUbackupRegs<vuIndex>();
|
||||
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
|
||||
|
@ -264,7 +271,8 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
}
|
||||
else {
|
||||
uptr jumpAddr;
|
||||
u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs, so back them up
|
||||
u32 bPC = iPC; // mVUcompile can modify iPC, mVUregs, and mVUflagInfo, so back them up
|
||||
u32 bFlagInfo = mVUflagInfo;
|
||||
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
|
||||
|
||||
incPC2(1); // Get PC for branch not-taken
|
||||
|
@ -272,6 +280,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
else mVUcompileVU1(xPC, (uptr)&mVUregs);
|
||||
|
||||
iPC = bPC;
|
||||
mVUflagInfo = bFlagInfo;
|
||||
incPC(-3); // Go back to branch opcode (to get branch imm addr)
|
||||
if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&pBlock->pStateEnd);
|
||||
else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&pBlock->pStateEnd);
|
||||
|
@ -284,7 +293,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); }
|
||||
|
||||
// Do E-bit end stuff here
|
||||
mVUendProgram<vuIndex>(bStatus[3], bMac[3]);
|
||||
mVUendProgram<vuIndex>(findFlagInst(xStatus, 0x7fffffff), findFlagInst(xMac, 0x7fffffff), findFlagInst(xClip, 0x7fffffff));
|
||||
|
||||
return thisPtr; //ToDo: Save pipeline state?
|
||||
}
|
||||
|
|
|
@ -57,6 +57,10 @@ microVUt(void) mVUdispatcherA() {
|
|||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, xmmT1);
|
||||
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL);
|
||||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, xmmT1);
|
||||
|
||||
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
|
||||
|
|
|
@ -19,20 +19,6 @@
|
|||
#pragma once
|
||||
#ifdef PCSX2_MICROVU
|
||||
|
||||
// ToDo: Fix this properly.
|
||||
#define flagSetMacro(xFlag, pFlag, xF, yF, zF) { \
|
||||
yF += (mVUstall > 3) ? 3 : mVUstall; \
|
||||
if (yF > zF) { \
|
||||
pFlag += (yF-zF); \
|
||||
if (pFlag >= xFlag) pFlag = (xFlag-1); \
|
||||
zF++; \
|
||||
xF = (yF-zF); \
|
||||
zF = yF; \
|
||||
yF -= xF; \
|
||||
} \
|
||||
yF++; \
|
||||
}
|
||||
|
||||
// Sets FDIV Flags at the proper time
|
||||
microVUt(void) mVUdivSet() {
|
||||
microVU* mVU = mVUx;
|
||||
|
@ -69,77 +55,105 @@ microVUt(void) mVUstatusFlagOp() {
|
|||
iPC = curPC;
|
||||
}
|
||||
|
||||
int findFlagInst(int* fFlag, int cycles) {
|
||||
int j = 0, jValue = -1;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if ((fFlag[i] <= cycles) && (fFlag[i] > jValue)) { j = i; jValue = fFlag[i]; }
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
// Setup Last 4 instances of Status/Mac/Clip flags (needed for accurate block linking)
|
||||
void sortFlag(int* fFlag, int* bFlag, int cycles) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
bFlag[i] = findFlagInst(fFlag, cycles);
|
||||
cycles++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
|
||||
microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
|
||||
microVUt(int) mVUsetFlags(int* xStatus, int* xMac, int* xClip) {
|
||||
microVU* mVU = mVUx;
|
||||
|
||||
// Ensure last ~4+ instructions update mac flags
|
||||
int endPC = iPC;
|
||||
u32 aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances
|
||||
for (int i = mVUcount, iX = 0; i > 0; i--, aCount++) {
|
||||
if (doStatus) { mVUinfo |= _doMac; iX++; if ((iX >= 4) || (aCount > 4)) { break; } }
|
||||
u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking
|
||||
|
||||
// Ensure last ~4+ instructions update mac flags
|
||||
for (int i = mVUcount; i > 0; i--, aCount++) {
|
||||
if (doStatus) { if (__Mac) { mVUinfo |= _doMac; } if (aCount > 4) { break; } }
|
||||
incPC2(-2);
|
||||
}
|
||||
|
||||
// Status/Mac Flags Setup Code
|
||||
int xStatus = 8, xMac = 8; // Flag Instances start at #0 on every block ((8&3) == 0)
|
||||
int pStatus = 3, pMac = 3;
|
||||
int xClip = mVUregs.clip + 8, pClip = mVUregs.clip + 7; // Clip Instance starts from where it left off
|
||||
int xS = 0, yS = 1, zS = 0;
|
||||
int xM = 0, yM = 1, zM = 0;
|
||||
int xC = 0, yC = 1, zC = 0;
|
||||
int xS = 0, xM = 0, xC = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
xStatus[i] = i;
|
||||
xMac [i] = i;
|
||||
xClip [i] = i;
|
||||
}
|
||||
|
||||
if (!(mVUpBlock->pState.needExactMatch & 0x00f)) {
|
||||
xS = (mVUpBlock->pState.flags >> 0) & 3;
|
||||
xStatus[0] = -1; xStatus[1] = -1;
|
||||
xStatus[2] = -1; xStatus[3] = -1;
|
||||
xStatus[(xS-1)&3] = 0;
|
||||
}
|
||||
|
||||
if (!(mVUpBlock->pState.needExactMatch & 0xf00)) {
|
||||
xC = (mVUpBlock->pState.flags >> 2) & 3;
|
||||
xClip[0] = -1; xClip[1] = -1;
|
||||
xClip[2] = -1; xClip[3] = -1;
|
||||
xClip[(xC-1)&3] = 0;
|
||||
}
|
||||
|
||||
if (!(mVUpBlock->pState.needExactMatch & 0x0f0)) {
|
||||
xMac[0] = -1; xMac[1] = -1;
|
||||
xMac[2] = -1; xMac[3] = -1;
|
||||
}
|
||||
|
||||
int cycles = 0;
|
||||
u32 xCount = mVUcount; // Backup count
|
||||
iPC = mVUstartPC;
|
||||
for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
|
||||
if (((xCount - mVUcount) > aCount) && isFSSET) mVUstatusFlagOp<vuIndex>(); // Don't Optimize out on the last ~4+ instructions
|
||||
if (isFSSET) {
|
||||
if (__Status) { // Don't Optimize out on the last ~4+ instructions
|
||||
if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp<vuIndex>(); }
|
||||
}
|
||||
else mVUstatusFlagOp<vuIndex>();
|
||||
}
|
||||
cycles += mVUstall;
|
||||
|
||||
flagSetMacro(xStatus, pStatus, xS, yS, zS); // Handles _fvsinstances
|
||||
flagSetMacro(xMac, pMac, xM, yM, zM); // Handles _fvminstances
|
||||
flagSetMacro(xClip, pClip, xC, yC, zC); // Handles _fvcinstances
|
||||
mVUinfo |= findFlagInst(xStatus, cycles) << 18; // _fvsInstance
|
||||
mVUinfo |= findFlagInst(xMac, cycles) << 16; // _fvmInstance
|
||||
mVUinfo |= findFlagInst(xClip, cycles) << 20; // _fvcInstance
|
||||
|
||||
mVUinfo |= (xStatus&3) << 12; // _fsInstance
|
||||
mVUinfo |= (xMac&3) << 10; // _fmInstance
|
||||
mVUinfo |= (xClip&3) << 14; // _fcInstance
|
||||
mVUinfo |= (xS & 3) << 12; // _fsInstance
|
||||
mVUinfo |= (xM & 3) << 10; // _fmInstance
|
||||
mVUinfo |= (xC & 3) << 14; // _fcInstance
|
||||
|
||||
mVUinfo |= (pStatus&3) << 18; // _fvsInstance
|
||||
mVUinfo |= (pMac&3) << 16; // _fvmInstance
|
||||
mVUinfo |= (pClip&3) << 20; // _fvcInstance
|
||||
if (doStatus||isFSSET||doDivFlag) { xStatus [xS] = cycles + 4; xS = (xS+1)&3; }
|
||||
if (doMac) { xMac [xM] = cycles + 4; xM = (xM+1)&3; }
|
||||
if (doClip) { xClip [xC] = cycles + 4; xC = (xC+1)&3; }
|
||||
|
||||
if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1); }
|
||||
if (doMac) { xMac = (xMac+1); }
|
||||
if (doClip) { xClip = (xClip+1); }
|
||||
cycles++;
|
||||
incPC2(2);
|
||||
}
|
||||
mVUcount = xCount; // Restore count
|
||||
mVUregs.clip = xClip&3; // Note: Clip timing isn't cycle-accurate between block linking; but hopefully doesn't matter
|
||||
|
||||
// Temp Hack-fix until flag-algorithm rewrite
|
||||
for (int i = 0; i < 4; i++) {
|
||||
bStatus[i] = 0;
|
||||
bMac[i] = 0;
|
||||
}
|
||||
|
||||
// Setup Last 4 instances of Status/Mac flags (needed for accurate block linking)
|
||||
iPC = endPC;
|
||||
for (int i = 3, j = 3, ii = 1, jj = 1; aCount > 0; ii++, jj++, aCount--) {
|
||||
if ((doStatus||isFSSET||doDivFlag) && (i >= 0)) {
|
||||
for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; }
|
||||
}
|
||||
if (doMac && (j >= 0)) {
|
||||
for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[j] = xMac; }
|
||||
}
|
||||
incPC2(-2);
|
||||
}
|
||||
iPC = endPC;
|
||||
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
|
||||
return cycles;
|
||||
}
|
||||
|
||||
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
|
||||
#define shuffleMac ((bMac[3]<<6)|(bMac[2]<<4)|(bMac[1]<<2)|bMac[0])
|
||||
#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0])
|
||||
#define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0])
|
||||
|
||||
// Recompiles Code for Proper Flags on Block Linkings
|
||||
microVUt(void) mVUsetupFlags(int* bStatus, int* bMac) {
|
||||
microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) {
|
||||
microVU* mVU = mVUx;
|
||||
|
||||
if (__Status) {
|
||||
int bStatus[4];
|
||||
sortFlag(xStatus, bStatus, cycles);
|
||||
PUSH32R(gprR); // Backup gprR
|
||||
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
|
||||
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
|
||||
|
@ -149,10 +163,69 @@ microVUt(void) mVUsetupFlags(int* bStatus, int* bMac) {
|
|||
MOV32RtoR(gprF1, gprT2);
|
||||
MOV32RtoR(gprF2, gprR);
|
||||
POP32R(gprR); // Restore gprR
|
||||
}
|
||||
|
||||
if (__Mac) {
|
||||
int bMac[4];
|
||||
sortFlag(xMac, bMac, cycles);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->macFlag);
|
||||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleMac);
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->macFlag, xmmT1);
|
||||
}
|
||||
|
||||
if (__Clip) {
|
||||
int bClip[4];
|
||||
sortFlag(xClip, bClip, cycles);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->clipFlag);
|
||||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleClip);
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUpass4(int startPC) {
|
||||
|
||||
microVU* mVU = mVUx;
|
||||
int oldPC = iPC;
|
||||
int oldCount = mVUcount;
|
||||
int oldBranch = mVUbranch;
|
||||
iPC = startPC / 4;
|
||||
mVUcount = 0;
|
||||
mVUbranch = 0;
|
||||
for (int branch = 0; mVUcount < 4; mVUcount++) {
|
||||
incPC(1);
|
||||
if ( curI & _Ebit_ ) { branch = 1; }
|
||||
if ( curI & _MDTbit_ ) { branch = 2; }
|
||||
if (!(curI & _Ibit_) ) { incPC(-1); mVUopL<vuIndex, 3>(); incPC(1); }
|
||||
if (branch >= 2) { break; }
|
||||
else if (branch == 1) { branch = 2; }
|
||||
if (mVUbranch) { branch = 3; mVUbranch = 0; }
|
||||
incPC(1);
|
||||
}
|
||||
iPC = oldPC;
|
||||
mVUcount = oldCount;
|
||||
mVUbranch = oldBranch;
|
||||
setCode();
|
||||
}
|
||||
|
||||
#define branchType1 if (mVUbranch <= 2) // B/BAL
|
||||
#define branchType2 else if (mVUbranch >= 9) // JR/JALR
|
||||
#define branchType3 else // Conditional Branch
|
||||
|
||||
microVUt(void) mVUsetFlagInfo() {
|
||||
microVU* mVU = mVUx;
|
||||
branchType1 { incPC(-1); mVUpass4<vuIndex>(branchAddr); incPC(1); }
|
||||
branchType2 { mVUflagInfo |= 0xffffffff; }
|
||||
branchType3 {
|
||||
incPC(-1);
|
||||
mVUpass4<vuIndex>(branchAddr);
|
||||
int backupFlagInfo = mVUflagInfo;
|
||||
mVUflagInfo = 0;
|
||||
incPC(4); // Branch Not Taken
|
||||
mVUpass4<vuIndex>(xPC);
|
||||
incPC(-3);
|
||||
//if (mVUflagInfo != backupFlagInfo) { mVUflagInfo |= __NeedExact; }
|
||||
mVUflagInfo |= backupFlagInfo;
|
||||
}
|
||||
}
|
||||
|
||||
#endif //PCSX2_MICROVU
|
|
@ -440,6 +440,7 @@ microVUf(void) mVU_FCAND() {
|
|||
mVUallocVIb<vuIndex>(gprT1, 1);
|
||||
}
|
||||
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FCEQ() {
|
||||
|
@ -453,6 +454,7 @@ microVUf(void) mVU_FCEQ() {
|
|||
mVUallocVIb<vuIndex>(gprT1, 1);
|
||||
}
|
||||
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FCGET() {
|
||||
|
@ -464,6 +466,7 @@ microVUf(void) mVU_FCGET() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FCGET vi%02d", _Ft_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FCOR() {
|
||||
|
@ -477,6 +480,7 @@ microVUf(void) mVU_FCOR() {
|
|||
mVUallocVIb<vuIndex>(gprT1, 1);
|
||||
}
|
||||
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FCSET() {
|
||||
|
@ -503,6 +507,7 @@ microVUf(void) mVU_FMAND() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 4); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FMEQ() {
|
||||
|
@ -517,6 +522,7 @@ microVUf(void) mVU_FMEQ() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 4); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FMOR() {
|
||||
|
@ -529,6 +535,7 @@ microVUf(void) mVU_FMOR() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 4); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -544,6 +551,7 @@ microVUf(void) mVU_FSAND() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FSEQ() {
|
||||
|
@ -557,6 +565,7 @@ microVUf(void) mVU_FSEQ() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FSOR() {
|
||||
|
@ -568,6 +577,7 @@ microVUf(void) mVU_FSOR() {
|
|||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); SysPrintf("b\n"); }
|
||||
}
|
||||
|
||||
microVUf(void) mVU_FSSET() {
|
||||
|
|
|
@ -146,6 +146,7 @@ declareAllVariables
|
|||
#define pass1 if (recPass == 0)
|
||||
#define pass2 if (recPass == 1)
|
||||
#define pass3 if (recPass == 2)
|
||||
#define pass4 if (recPass == 3)
|
||||
|
||||
// Misc Macros...
|
||||
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
|
||||
|
@ -159,6 +160,7 @@ declareAllVariables
|
|||
#define mVUregs mVUallocInfo.block.pState
|
||||
#define mVUregsTemp mVUallocInfo.regsTemp
|
||||
#define iPC mVUallocInfo.curPC
|
||||
#define mVUflagInfo mVUregs.needExactMatch //mVUallocInfo.flagInfo
|
||||
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
||||
#define mVUstall mVUallocInfo.stall[iPC / 2]
|
||||
#define mVUstartPC mVUallocInfo.startPC
|
||||
|
@ -249,6 +251,13 @@ declareAllVariables
|
|||
#define mVUlogQ() { mVUlog(", Q"); }
|
||||
#define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); }
|
||||
|
||||
// Flag Info
|
||||
//#define __NeedExact (1<<12)
|
||||
//#define __ExactMatch (mVUregs.needExactMatch & (1<<12))
|
||||
#define __Status (mVUflagInfo & (0xf<<0))
|
||||
#define __Mac (mVUflagInfo & (0xf<<4))
|
||||
#define __Clip (mVUflagInfo & (0xf<<8))
|
||||
|
||||
// Store VI regs in mmx regs?
|
||||
#define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8)
|
||||
#define mmVI(_VIreg_) (_VIreg_ - 1)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue