- Major flag algorithm rewrites (work in progress)
- Abused macros to create my tables, knocking off 800 lines of code.
- VU0 and VU1 have different dynarec cache sizes now (vu0's is 1/4 the size of vu1's)
- Minor changes...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1166 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-11 01:39:33 +00:00
parent 25090899ce
commit d19229d334
10 changed files with 376 additions and 1083 deletions

View File

@ -43,6 +43,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
mVU->cache = NULL; mVU->cache = NULL;
mVU->cacheSize = mVUcacheSize;
memset(&mVU->prog, 0, sizeof(mVU->prog)); memset(&mVU->prog, 0, sizeof(mVU->prog));
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init"); mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");

View File

@ -49,15 +49,15 @@ public:
} }
microBlock* search(microRegInfo* pState) { microBlock* search(microRegInfo* pState) {
if (listSize < 0) return NULL; if (listSize < 0) return NULL;
if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i <= listSize; i++) { for (int i = 0; i <= listSize; i++) {
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo) - 1)) return &blockList[i]; if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo)/* - 4*/)) return &blockList[i];
} }
} }
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
for (int i = 0; i <= listSize; i++) { for (int i = 0; i <= listSize; i++) {
if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p) if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)
&& (blockList[i].pState.clip == pState->clip)) { return &blockList[i]; } && (blockList[i].pState.flags == pState->flags)) { return &blockList[i]; }
} }
} }
return NULL; return NULL;
@ -87,11 +87,12 @@ struct microProgManager {
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution) microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
}; };
#define mVUcacheSize (0x1f00000 / ((vuIndex) ? 1 : 4))
struct microVU { struct microVU {
u32 index; // VU Index (VU0 or VU1) u32 index; // VU Index (VU0 or VU1)
u32 microSize; // VU Micro Memory Size u32 microSize; // VU Micro Memory Size
u32 progSize; // VU Micro Program Size (microSize/4) u32 progSize; // VU Micro Program Size (microSize/4)
static const u32 cacheSize = 0xb00000; // VU Cache Size u32 cacheSize; // VU Cache Size
microProgManager<0x4000> prog; // Micro Program Data microProgManager<0x4000> prog; // Micro Program Data

View File

@ -39,8 +39,8 @@ struct microRegInfo {
u8 p; u8 p;
u8 r; u8 r;
u8 xgkick; u8 xgkick;
u8 clip; u8 flags; // clip x2 :: status x2
u8 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last byte in struct) u32 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last 2 bytes in struct)
#if defined(_MSC_VER) #if defined(_MSC_VER)
}; };
#pragma pack(pop) #pragma pack(pop)
@ -75,6 +75,7 @@ struct microAllocInfo {
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block u32 startPC; // Start PC for Cur Block
u32 flagInfo; // Holds information to help with flag instances on block linking
u32 info[pSize/8]; // Info for Instructions in current block u32 info[pSize/8]; // Info for Instructions in current block
u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
}; };

View File

@ -249,9 +249,9 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
microVUt(void) mVUanalyzeSflag(int It) { microVUt(void) mVUanalyzeSflag(int It) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (!It) { mVUinfo |= _isNOP; } if (!It) { mVUinfo |= _isNOP; }
else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from else {
mVUinfo |= _swapOps; mVUinfo |= _swapOps;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; } if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf /*<< mVUcount*/; }
if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); }
// Note: _isSflag is used for status flag optimizations. // Note: _isSflag is used for status flag optimizations.
// Do to stalls, it can only be set one instruction prior to the status flag read instruction // Do to stalls, it can only be set one instruction prior to the status flag read instruction
@ -279,7 +279,7 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) {
if (!It) { mVUinfo |= _isNOP; } if (!It) { mVUinfo |= _isNOP; }
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
mVUinfo |= _swapOps; mVUinfo |= _swapOps;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; } if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); }
int curPC = iPC; int curPC = iPC;
for (int i = mVUcount, j = 0; i > 1; i--, j++) { for (int i = mVUcount, j = 0; i > 1; i--, j++) {
incPC2(-2); incPC2(-2);
@ -297,7 +297,8 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) {
microVUt(void) mVUanalyzeCflag() { microVUt(void) mVUanalyzeCflag() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; } mVUinfo |= _swapOps;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 8); }
} }
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -24,8 +24,7 @@
//------------------------------------------------------------------ //------------------------------------------------------------------
#define branchCase(JMPcc, nJMPcc) \ #define branchCase(JMPcc, nJMPcc) \
mVUsetupBranch<vuIndex>(bStatus, bMac); \ mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles); \
mVUprint("mVUcompile branchCase"); \
CMP16ItoM((uptr)&mVU->branch, 0); \ CMP16ItoM((uptr)&mVU->branch, 0); \
incPC2(1); \ incPC2(1); \
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \
@ -46,12 +45,12 @@
//------------------------------------------------------------------ //------------------------------------------------------------------
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings // Recompiles Code for Proper Flags and Q/P regs on Block Linkings
microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { microVUt(void) mVUsetupBranch(int* xStatus, int* xMac, int* xClip, int xCycles) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
mVUprint("mVUsetupBranch"); mVUprint("mVUsetupBranch");
// Shuffle Flag Instances // Shuffle Flag Instances
mVUsetupFlags<vuIndex>(bStatus, bMac); mVUsetupFlags<vuIndex>(xStatus, xMac, xClip, xCycles);
// Shuffle P/Q regs since every block starts at instance #0 // Shuffle P/Q regs since every block starts at instance #0
if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); } if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); }
@ -105,7 +104,7 @@ microVUt(void) mVUsetCycles() {
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick); tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
} }
microVUt(void) mVUendProgram(int fStatus, int fMac) { microVUt(void) mVUendProgram(int fStatus, int fMac, int fClip) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0) incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100; mVUcycles -= 100;
@ -121,8 +120,10 @@ microVUt(void) mVUendProgram(int fStatus, int fMac) {
// Save Flag Instances // Save Flag Instances
getFlagReg(fStatus, fStatus); getFlagReg(fStatus, fStatus);
mVUallocMFLAGa<vuIndex>(gprT1, fMac); mVUallocMFLAGa<vuIndex>(gprT1, fMac);
mVUallocCFLAGa<vuIndex>(gprT2, fClip);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus); MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1); MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);
//memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); //memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
//MOV32ItoM((uptr)&mVU->prog.lpState, (int)&mVUblock.pState); // Save pipeline state (clipflag instance) //MOV32ItoM((uptr)&mVU->prog.lpState, (int)&mVUblock.pState); // Save pipeline state (clipflag instance)
@ -133,12 +134,15 @@ microVUt(void) mVUendProgram(int fStatus, int fMac) {
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
} }
#define sI ((mVUpBlock->pState.needExactMatch & 0x000f) ? 0 : ((mVUpBlock->pState.flags >> 0) & 3))
#define cI ((mVUpBlock->pState.needExactMatch & 0x0f00) ? 0 : ((mVUpBlock->pState.flags >> 2) & 3))
microVUt(void) mVUtestCycles() { microVUt(void) mVUtestCycles() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
iPC = mVUstartPC; iPC = mVUstartPC;
CMP32ItoM((uptr)&mVU->cycles, 0); CMP32ItoM((uptr)&mVU->cycles, 0);
u8* jmp8 = JG8(0); u8* jmp8 = JG8(0);
mVUendProgram<vuIndex>(0, 0); mVUendProgram<vuIndex>(sI, 0, cI);
x86SetJ8(jmp8); x86SetJ8(jmp8);
SUB32ItoM((uptr)&mVU->cycles, mVUcycles); SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
} }
@ -171,8 +175,10 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUblock.x86ptrStart = thisPtr; mVUblock.x86ptrStart = thisPtr;
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
mVUpBlock = pBlock; mVUpBlock = pBlock;
mVUregs.flags = 0;
mVUflagInfo = 0;
for (int branch = 0;; ) { for (int branch = 0; mVUcount < (vuIndex ? (0x3fff/8) : (0xfff/8)); ) {
incPC(1); incPC(1);
mVUinfo = 0; mVUinfo = 0;
incCycles(1); incCycles(1);
@ -186,20 +192,21 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (mVU->p) { mVUinfo |= _readP; } if (mVU->p) { mVUinfo |= _readP; }
if (mVU->q) { mVUinfo |= _readQ; } if (mVU->q) { mVUinfo |= _readQ; }
else { mVUinfo |= _writeQ; } else { mVUinfo |= _writeQ; }
if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; } if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); mVUcount++; if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; }
else if (branch == 1) { branch = 2; } else if (branch == 1) { branch = 2; }
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } if (mVUbranch) { mVUsetFlagInfo<vuIndex>(); branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
incPC(1); incPC(1);
mVUcount++; mVUcount++;
} }
// Sets Up Flag instances // Sets Up Flag instances
int bStatus[4]; int bMac[4]; int xStatus[4], xMac[4], xClip[4];
mVUsetFlags<vuIndex>(bStatus, bMac); int xCycles = mVUsetFlags<vuIndex>(xStatus, xMac, xClip);
mVUtestCycles<vuIndex>(); mVUtestCycles<vuIndex>();
//SysPrintf("bS[0] = %08x, bS[1] = %08x, bS[2] = %08x, bS[3] = %08x\n", bStatus[0], bStatus[1], bStatus[2], bStatus[3]); //SysPrintf("bS[0] = %08x, bS[1] = %08x, bS[2] = %08x, bS[3] = %08x\n", bStatus[0], bStatus[1], bStatus[2], bStatus[3]);
//SysPrintf("bM[0] = %08x, bM[1] = %08x, bM[2] = %08x, bM[3] = %08x\n", bMac[0], bMac[1], bMac[2], bMac[3]); //SysPrintf("bM[0] = %08x, bM[1] = %08x, bM[2] = %08x, bM[3] = %08x\n", bMac[0], bMac[1], bMac[2], bMac[3]);
//SysPrintf("mVUcount = %d\n", mVUcount); //SysPrintf("mVUcount = %d\n", mVUcount);
//SysPrintf("mVUflagInfo = %d\n", mVUflagInfo);
// Second Pass // Second Pass
iPC = mVUstartPC; iPC = mVUstartPC;
@ -227,7 +234,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUprint("mVUcompile B/BAL"); mVUprint("mVUcompile B/BAL");
incPC(-3); // Go back to branch opcode (to get branch imm addr) incPC(-3); // Go back to branch opcode (to get branch imm addr)
mVUsetupBranch<vuIndex>(bStatus, bMac); mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles);
// Check if branch-block has already been compiled // Check if branch-block has already been compiled
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
@ -239,7 +246,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUprint("mVUcompile JR/JALR"); mVUprint("mVUcompile JR/JALR");
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch<vuIndex>(bStatus, bMac); mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles);
mVUbackupRegs<vuIndex>(); mVUbackupRegs<vuIndex>();
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
@ -264,7 +271,8 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
} }
else { else {
uptr jumpAddr; uptr jumpAddr;
u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs, so back them up u32 bPC = iPC; // mVUcompile can modify iPC, mVUregs, and mVUflagInfo, so back them up
u32 bFlagInfo = mVUflagInfo;
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
incPC2(1); // Get PC for branch not-taken incPC2(1); // Get PC for branch not-taken
@ -272,6 +280,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
else mVUcompileVU1(xPC, (uptr)&mVUregs); else mVUcompileVU1(xPC, (uptr)&mVUregs);
iPC = bPC; iPC = bPC;
mVUflagInfo = bFlagInfo;
incPC(-3); // Go back to branch opcode (to get branch imm addr) incPC(-3); // Go back to branch opcode (to get branch imm addr)
if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&pBlock->pStateEnd); if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&pBlock->pStateEnd);
else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&pBlock->pStateEnd); else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&pBlock->pStateEnd);
@ -284,7 +293,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); } if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); }
// Do E-bit end stuff here // Do E-bit end stuff here
mVUendProgram<vuIndex>(bStatus[3], bMac[3]); mVUendProgram<vuIndex>(findFlagInst(xStatus, 0x7fffffff), findFlagInst(xMac, 0x7fffffff), findFlagInst(xClip, 0x7fffffff));
return thisPtr; //ToDo: Save pipeline state? return thisPtr; //ToDo: Save pipeline state?
} }

View File

@ -57,6 +57,10 @@ microVUt(void) mVUdispatcherA() {
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0); SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, xmmT1); SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, xmmT1);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, xmmT1);
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]); SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);

View File

@ -19,20 +19,6 @@
#pragma once #pragma once
#ifdef PCSX2_MICROVU #ifdef PCSX2_MICROVU
// ToDo: Fix this properly.
#define flagSetMacro(xFlag, pFlag, xF, yF, zF) { \
yF += (mVUstall > 3) ? 3 : mVUstall; \
if (yF > zF) { \
pFlag += (yF-zF); \
if (pFlag >= xFlag) pFlag = (xFlag-1); \
zF++; \
xF = (yF-zF); \
zF = yF; \
yF -= xF; \
} \
yF++; \
}
// Sets FDIV Flags at the proper time // Sets FDIV Flags at the proper time
microVUt(void) mVUdivSet() { microVUt(void) mVUdivSet() {
microVU* mVU = mVUx; microVU* mVU = mVUx;
@ -69,90 +55,177 @@ microVUt(void) mVUstatusFlagOp() {
iPC = curPC; iPC = curPC;
} }
int findFlagInst(int* fFlag, int cycles) {
int j = 0, jValue = -1;
for (int i = 0; i < 4; i++) {
if ((fFlag[i] <= cycles) && (fFlag[i] > jValue)) { j = i; jValue = fFlag[i]; }
}
return j;
}
// Setup Last 4 instances of Status/Mac/Clip flags (needed for accurate block linking)
void sortFlag(int* fFlag, int* bFlag, int cycles) {
for (int i = 0; i < 4; i++) {
bFlag[i] = findFlagInst(fFlag, cycles);
cycles++;
}
}
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { microVUt(int) mVUsetFlags(int* xStatus, int* xMac, int* xClip) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
// Ensure last ~4+ instructions update mac flags
int endPC = iPC; int endPC = iPC;
u32 aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking
for (int i = mVUcount, iX = 0; i > 0; i--, aCount++) {
if (doStatus) { mVUinfo |= _doMac; iX++; if ((iX >= 4) || (aCount > 4)) { break; } } // Ensure last ~4+ instructions update mac flags
for (int i = mVUcount; i > 0; i--, aCount++) {
if (doStatus) { if (__Mac) { mVUinfo |= _doMac; } if (aCount > 4) { break; } }
incPC2(-2); incPC2(-2);
} }
// Status/Mac Flags Setup Code // Status/Mac Flags Setup Code
int xStatus = 8, xMac = 8; // Flag Instances start at #0 on every block ((8&3) == 0) int xS = 0, xM = 0, xC = 0;
int pStatus = 3, pMac = 3; for (int i = 0; i < 4; i++) {
int xClip = mVUregs.clip + 8, pClip = mVUregs.clip + 7; // Clip Instance starts from where it left off xStatus[i] = i;
int xS = 0, yS = 1, zS = 0; xMac [i] = i;
int xM = 0, yM = 1, zM = 0; xClip [i] = i;
int xC = 0, yC = 1, zC = 0; }
if (!(mVUpBlock->pState.needExactMatch & 0x00f)) {
xS = (mVUpBlock->pState.flags >> 0) & 3;
xStatus[0] = -1; xStatus[1] = -1;
xStatus[2] = -1; xStatus[3] = -1;
xStatus[(xS-1)&3] = 0;
}
if (!(mVUpBlock->pState.needExactMatch & 0xf00)) {
xC = (mVUpBlock->pState.flags >> 2) & 3;
xClip[0] = -1; xClip[1] = -1;
xClip[2] = -1; xClip[3] = -1;
xClip[(xC-1)&3] = 0;
}
if (!(mVUpBlock->pState.needExactMatch & 0x0f0)) {
xMac[0] = -1; xMac[1] = -1;
xMac[2] = -1; xMac[3] = -1;
}
int cycles = 0;
u32 xCount = mVUcount; // Backup count u32 xCount = mVUcount; // Backup count
iPC = mVUstartPC; iPC = mVUstartPC;
for (mVUcount = 0; mVUcount < xCount; mVUcount++) { for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
if (((xCount - mVUcount) > aCount) && isFSSET) mVUstatusFlagOp<vuIndex>(); // Don't Optimize out on the last ~4+ instructions if (isFSSET) {
if (__Status) { // Don't Optimize out on the last ~4+ instructions
if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp<vuIndex>(); }
}
else mVUstatusFlagOp<vuIndex>();
}
cycles += mVUstall;
flagSetMacro(xStatus, pStatus, xS, yS, zS); // Handles _fvsinstances mVUinfo |= findFlagInst(xStatus, cycles) << 18; // _fvsInstance
flagSetMacro(xMac, pMac, xM, yM, zM); // Handles _fvminstances mVUinfo |= findFlagInst(xMac, cycles) << 16; // _fvmInstance
flagSetMacro(xClip, pClip, xC, yC, zC); // Handles _fvcinstances mVUinfo |= findFlagInst(xClip, cycles) << 20; // _fvcInstance
mVUinfo |= (xStatus&3) << 12; // _fsInstance mVUinfo |= (xS & 3) << 12; // _fsInstance
mVUinfo |= (xMac&3) << 10; // _fmInstance mVUinfo |= (xM & 3) << 10; // _fmInstance
mVUinfo |= (xClip&3) << 14; // _fcInstance mVUinfo |= (xC & 3) << 14; // _fcInstance
mVUinfo |= (pStatus&3) << 18; // _fvsInstance if (doStatus||isFSSET||doDivFlag) { xStatus [xS] = cycles + 4; xS = (xS+1)&3; }
mVUinfo |= (pMac&3) << 16; // _fvmInstance if (doMac) { xMac [xM] = cycles + 4; xM = (xM+1)&3; }
mVUinfo |= (pClip&3) << 20; // _fvcInstance if (doClip) { xClip [xC] = cycles + 4; xC = (xC+1)&3; }
if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1); } cycles++;
if (doMac) { xMac = (xMac+1); }
if (doClip) { xClip = (xClip+1); }
incPC2(2); incPC2(2);
} }
mVUcount = xCount; // Restore count
mVUregs.clip = xClip&3; // Note: Clip timing isn't cycle-accurate between block linking; but hopefully doesn't matter
// Temp Hack-fix until flag-algorithm rewrite mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
for (int i = 0; i < 4; i++) { return cycles;
bStatus[i] = 0;
bMac[i] = 0;
}
// Setup Last 4 instances of Status/Mac flags (needed for accurate block linking)
iPC = endPC;
for (int i = 3, j = 3, ii = 1, jj = 1; aCount > 0; ii++, jj++, aCount--) {
if ((doStatus||isFSSET||doDivFlag) && (i >= 0)) {
for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; }
}
if (doMac && (j >= 0)) {
for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[j] = xMac; }
}
incPC2(-2);
}
iPC = endPC;
} }
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) #define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
#define shuffleMac ((bMac[3]<<6)|(bMac[2]<<4)|(bMac[1]<<2)|bMac[0]) #define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0])
#define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0])
// Recompiles Code for Proper Flags on Block Linkings // Recompiles Code for Proper Flags on Block Linkings
microVUt(void) mVUsetupFlags(int* bStatus, int* bMac) { microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
PUSH32R(gprR); // Backup gprR if (__Status) {
MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); int bStatus[4];
MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); sortFlag(xStatus, bStatus, cycles);
MOV32RtoR(gprR, getFlagReg1(bStatus[2])); PUSH32R(gprR); // Backup gprR
MOV32RtoR(gprF3, getFlagReg1(bStatus[3])); MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
MOV32RtoR(gprF0, gprT1); MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
MOV32RtoR(gprF1, gprT2); MOV32RtoR(gprR, getFlagReg1(bStatus[2]));
MOV32RtoR(gprF2, gprR); MOV32RtoR(gprF3, getFlagReg1(bStatus[3]));
POP32R(gprR); // Restore gprR MOV32RtoR(gprF0, gprT1);
MOV32RtoR(gprF1, gprT2);
MOV32RtoR(gprF2, gprR);
POP32R(gprR); // Restore gprR
}
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->macFlag); if (__Mac) {
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleMac); int bMac[4];
SSE_MOVAPS_XMM_to_M128((uptr)mVU->macFlag, xmmT1); sortFlag(xMac, bMac, cycles);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->macFlag);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleMac);
SSE_MOVAPS_XMM_to_M128((uptr)mVU->macFlag, xmmT1);
}
if (__Clip) {
int bClip[4];
sortFlag(xClip, bClip, cycles);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->clipFlag);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleClip);
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);
}
} }
#endif //PCSX2_MICROVU microVUt(void) mVUpass4(int startPC) {
microVU* mVU = mVUx;
int oldPC = iPC;
int oldCount = mVUcount;
int oldBranch = mVUbranch;
iPC = startPC / 4;
mVUcount = 0;
mVUbranch = 0;
for (int branch = 0; mVUcount < 4; mVUcount++) {
incPC(1);
if ( curI & _Ebit_ ) { branch = 1; }
if ( curI & _MDTbit_ ) { branch = 2; }
if (!(curI & _Ibit_) ) { incPC(-1); mVUopL<vuIndex, 3>(); incPC(1); }
if (branch >= 2) { break; }
else if (branch == 1) { branch = 2; }
if (mVUbranch) { branch = 3; mVUbranch = 0; }
incPC(1);
}
iPC = oldPC;
mVUcount = oldCount;
mVUbranch = oldBranch;
setCode();
}
#define branchType1 if (mVUbranch <= 2) // B/BAL
#define branchType2 else if (mVUbranch >= 9) // JR/JALR
#define branchType3 else // Conditional Branch
microVUt(void) mVUsetFlagInfo() {
microVU* mVU = mVUx;
branchType1 { incPC(-1); mVUpass4<vuIndex>(branchAddr); incPC(1); }
branchType2 { mVUflagInfo |= 0xffffffff; }
branchType3 {
incPC(-1);
mVUpass4<vuIndex>(branchAddr);
int backupFlagInfo = mVUflagInfo;
mVUflagInfo = 0;
incPC(4); // Branch Not Taken
mVUpass4<vuIndex>(xPC);
incPC(-3);
//if (mVUflagInfo != backupFlagInfo) { mVUflagInfo |= __NeedExact; }
mVUflagInfo |= backupFlagInfo;
}
}
#endif //PCSX2_MICROVU

View File

@ -440,6 +440,7 @@ microVUf(void) mVU_FCAND() {
mVUallocVIb<vuIndex>(gprT1, 1); mVUallocVIb<vuIndex>(gprT1, 1);
} }
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); } pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
} }
microVUf(void) mVU_FCEQ() { microVUf(void) mVU_FCEQ() {
@ -453,6 +454,7 @@ microVUf(void) mVU_FCEQ() {
mVUallocVIb<vuIndex>(gprT1, 1); mVUallocVIb<vuIndex>(gprT1, 1);
} }
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); } pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
} }
microVUf(void) mVU_FCGET() { microVUf(void) mVU_FCGET() {
@ -463,7 +465,8 @@ microVUf(void) mVU_FCGET() {
AND32ItoR(gprT1, 0xfff); AND32ItoR(gprT1, 0xfff);
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FCGET vi%02d", _Ft_); } pass3 { mVUlog("FCGET vi%02d", _Ft_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
} }
microVUf(void) mVU_FCOR() { microVUf(void) mVU_FCOR() {
@ -477,6 +480,7 @@ microVUf(void) mVU_FCOR() {
mVUallocVIb<vuIndex>(gprT1, 1); mVUallocVIb<vuIndex>(gprT1, 1);
} }
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); } pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 8); }
} }
microVUf(void) mVU_FCSET() { microVUf(void) mVU_FCSET() {
@ -503,6 +507,7 @@ microVUf(void) mVU_FMAND() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); } pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 4); }
} }
microVUf(void) mVU_FMEQ() { microVUf(void) mVU_FMEQ() {
@ -517,6 +522,7 @@ microVUf(void) mVU_FMEQ() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); } pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 4); }
} }
microVUf(void) mVU_FMOR() { microVUf(void) mVU_FMOR() {
@ -529,6 +535,7 @@ microVUf(void) mVU_FMOR() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); } pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 4); }
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -544,6 +551,7 @@ microVUf(void) mVU_FSAND() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); }
} }
microVUf(void) mVU_FSEQ() { microVUf(void) mVU_FSEQ() {
@ -557,6 +565,7 @@ microVUf(void) mVU_FSEQ() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); }
} }
microVUf(void) mVU_FSOR() { microVUf(void) mVU_FSOR() {
@ -568,6 +577,7 @@ microVUf(void) mVU_FSOR() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); SysPrintf("b\n"); }
} }
microVUf(void) mVU_FSSET() { microVUf(void) mVU_FSSET() {

View File

@ -146,6 +146,7 @@ declareAllVariables
#define pass1 if (recPass == 0) #define pass1 if (recPass == 0)
#define pass2 if (recPass == 1) #define pass2 if (recPass == 1)
#define pass3 if (recPass == 2) #define pass3 if (recPass == 2)
#define pass4 if (recPass == 3)
// Misc Macros... // Misc Macros...
#define mVUcurProg mVU->prog.prog[mVU->prog.cur] #define mVUcurProg mVU->prog.prog[mVU->prog.cur]
@ -159,6 +160,7 @@ declareAllVariables
#define mVUregs mVUallocInfo.block.pState #define mVUregs mVUallocInfo.block.pState
#define mVUregsTemp mVUallocInfo.regsTemp #define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC #define iPC mVUallocInfo.curPC
#define mVUflagInfo mVUregs.needExactMatch //mVUallocInfo.flagInfo
#define mVUinfo mVUallocInfo.info[iPC / 2] #define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUstall mVUallocInfo.stall[iPC / 2] #define mVUstall mVUallocInfo.stall[iPC / 2]
#define mVUstartPC mVUallocInfo.startPC #define mVUstartPC mVUallocInfo.startPC
@ -249,6 +251,13 @@ declareAllVariables
#define mVUlogQ() { mVUlog(", Q"); } #define mVUlogQ() { mVUlog(", Q"); }
#define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); } #define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); }
// Flag Info
//#define __NeedExact (1<<12)
//#define __ExactMatch (mVUregs.needExactMatch & (1<<12))
#define __Status (mVUflagInfo & (0xf<<0))
#define __Mac (mVUflagInfo & (0xf<<4))
#define __Clip (mVUflagInfo & (0xf<<8))
// Store VI regs in mmx regs? // Store VI regs in mmx regs?
#define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8) #define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8)
#define mmVI(_VIreg_) (_VIreg_ - 1) #define mmVI(_VIreg_) (_VIreg_ - 1)

File diff suppressed because it is too large Load Diff