From 45e9f544cd06d063663232efa4bb29d542e84131 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 18 Apr 2009 04:28:24 +0000 Subject: [PATCH] microVU: mostly block-linking/execution stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1003 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 10 +++--- pcsx2/x86/microVU.h | 66 +++++++++++------------------------ pcsx2/x86/microVU_Alloc.h | 10 +++++- pcsx2/x86/microVU_Analyze.inl | 16 ++++----- pcsx2/x86/microVU_Compile.inl | 63 +++++++++++++++++++++------------ pcsx2/x86/microVU_Execute.inl | 37 +++++++------------- pcsx2/x86/microVU_Misc.h | 12 ++++--- pcsx2/x86/microVU_Tables.inl | 2 -- 8 files changed, 105 insertions(+), 111 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index d54d8367ab..af8af9002b 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -63,9 +63,9 @@ microVUt(void) mVUreset() { // Dynarec Cache mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - mVU->ptr = mVU->cache; - + // Setup Entrance/Exit Points + x86SetPtr(mVU->cache); mVUdispatcherA(); mVUdispatcherB(); @@ -105,6 +105,7 @@ microVUt(void) mVUclose() { microVUt(void) mVUclear(u32 addr, u32 size) { microVU* mVU = mVUx; + memset(&mVU->prog.lpState, 0, sizeof(mVU->prog.lpState)); mVU->prog.cleared = 1; // Next execution searches/creates a new microprogram // Note: It might be better to copy old recompiled blocks to the new microprogram rec data // however, if games primarily do big writes, its probably not worth it. @@ -157,6 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { if (mVU->prog.cleared) { // If cleared, we need to search for new program for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) + //if (mVU->prog.prog[i]) // ToDo: Implement Cycles if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; @@ -172,7 +174,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { mVU->prog.prog[mVU->prog.cur].used++; return 1; // If !cleared, then we're still on the same program as last-time ;) } - +/* // Block Invalidation __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { @@ -192,7 +194,7 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { mVU->prog.prog[mVU->prog.cur].block[i]->clearFast(); } } - +*/ //------------------------------------------------------------------ // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 90096299de..ad54f1eeb1 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -25,67 +25,40 @@ #include "microVU_Alloc.h" #include "microVU_Misc.h" -struct microBlock { - microRegInfo pState; // Detailed State of Pipeline - u32 pipelineState; // | FDiv x 4 | EFU x 6 | Needs pState Info? x 1 | // Simple State of Pipeline - u8* x86ptrStart; // Start of code - u8* x86ptrEnd; // End of code (first byte outside of block) - u8* x86ptrBranch; // - u32 size; // Number of 64bit VU Instructions in Block -}; - #define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...) class microBlockManager { private: static const int MaxBlocks = mMaxBlocks - 1; - u32 startPC; - u32 endPC; int listSize; // Total Items - 1 - int callerSize; // Total Callers - 1 microBlock blockList[mMaxBlocks]; - microBlock callersList[mMaxBlocks]; // Foreign Blocks that call Local Blocks public: - microBlockManager() { init(); } - ~microBlockManager() { close(); } - void init() { - listSize = -1; - callerSize = -1; - //ZeroMemory(&blockList, sizeof(blockList)); // Can be Omitted? - //ZeroMemory(&blockList, sizeof(callersList)); // Can be Omitted? - } - void reset() { init(); }; - void close() {}; // Can be Omitted? - /*void add(u32 pipelineState, u8* x86ptrStart) { - if (!search(pipelineState)) { + microBlockManager() { reset(); } + ~microBlockManager() {} + void reset() { listSize = -1; }; + microBlock* add(microBlock* pBlock) { + microBlock* thisBlock = search(&pBlock->pState); + if (!thisBlock) { listSize++; listSize &= MaxBlocks; - blockList[listSize].pipelineState = pipelineState; - blockList[listSize].x86ptrStart = x86ptrStart; + memcpy_fast(&blockList[listSize], pBlock, sizeof(microBlock)); + thisBlock = &blockList[listSize]; } - }*/ - microBlock* search(/*u32 pipelineState,*/ microRegInfo* pState) { - /*if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State) - for (int i = 0; i < listSize; i++) { + return thisBlock; + } + microBlock* search(microRegInfo* pState) { + if (listSize < 0) return NULL; + if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) + for (int i = 0; i <= listSize; i++) { if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i]; } } else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) - for (int i = 0; i < listSize; i++) { - if (blockList[i].pipelineState == pipelineState) return &blockList[i]; + for (int i = 0; i <= listSize; i++) { + if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)) { return &blockList[i]; } } - }*/ - return NULL; - } - void clearFast() { - listSize = -1; - for ( ; callerSize >= 0; callerSize--) { - //callerList[callerSize]. // ToDo: Implement Branch Link Removal Code } - } - int clear() { - if (listSize >= 0) { clearFast(); return 1; } - else return 0; + return NULL; } }; @@ -109,7 +82,7 @@ struct microProgManager { int total; // Total Number of valid MicroPrograms minus 1 int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one) int finished; // Completed MicroProgram by E-bit Termination - u32 lastPipelineState; // Pipeline state from where it left off (useful for continuing execution) + microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution) }; struct microVU { @@ -124,7 +97,6 @@ struct microVU { u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) u8* startFunct; // Ptr Function to the Start code for recompiled programs u8* exitFunct; // Ptr Function to the Exit code for recompiled programs - u8* ptr; // Pointer to next place to write recompiled code to u32 code; // Contains the current Instruction u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) @@ -167,6 +139,8 @@ typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if #include "microVU_Misc.inl" #include "microVU_Analyze.inl" #include "microVU_Alloc.inl" +#include "microVU_Upper.inl" +#include "microVU_Lower.inl" #include "microVU_Tables.inl" #include "microVU_Compile.inl" #include "microVU_Execute.inl" diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 8a827eea3f..49b0fda39e 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -18,6 +18,7 @@ #pragma once + union regInfo { u32 reg; struct { @@ -49,9 +50,16 @@ struct microTempRegInfo { u8 xgkick; // Holds the cycle info for XGkick }; +struct microBlock { + microRegInfo pState; // Detailed State of Pipeline + u8* x86ptrStart; // Start of code + //u8* x86ptrEnd; // End of code (first byte outside of block) + //u32 size; // Number of 64bit VU Instructions in Block +}; + template struct microAllocInfo { - microRegInfo regs; // Pipeline info + microBlock block; // Block/Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR u32 cycles; // Cycles for current block diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 94f75d0f2f..9381c317bf 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -247,7 +247,8 @@ microVUt(void) mVUanalyzeSflag(int It) { if (!It) { mVUinfo |= _isNOP; } else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from mVUinfo |= _swapOps; - if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } + if (mVUcount < 4) { mVUregs.needExactMatch = 1; } + if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } // Note: _isSflag is used for status flag optimizations. // Do to stalls, it can only be set one instruction prior to the status flag read instruction // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. @@ -274,14 +275,13 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { if (!It) { mVUinfo |= _isNOP; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo |= _swapOps; - if (mVUcount > 1) { - int curPC = iPC; - for (int i = mVUcount, j = 0; i > 1; i--, j++) { - incPC(-2); - if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } - } - iPC = curPC; + if (mVUcount < 4) { mVUregs.needExactMatch = 1; } + int curPC = iPC; + for (int i = mVUcount, j = 0; i > 1; i--, j++) { + incPC(-2); + if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } } + iPC = curPC; } analyzeVIreg1(Is); analyzeVIreg2(It, 1); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 775a2aec5d..1e21e6ab3f 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -143,8 +143,9 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { #define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) #define getFlagReg2(x) ((x == bStatus[3]) ? gprESP : ((x == bStatus[2]) ? gprR : ((x == bStatus[1]) ? gprT2 : gprT1))) -// Recompiles Code for Proper Flags on Block Linkings -microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { +// Recompiles Code for Proper Flags and Q/P regs on Block Linkings +microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { + microVU* mVU = mVUx; PUSH32R(gprR); // Backup gprR PUSH32R(gprESP); // Backup gprESP @@ -176,6 +177,9 @@ microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { POP32R(gprESP); // Restore gprESP POP32R(gprR); // Restore gprR + + // Shuffle P/Q regs since every block starts at instance #0 + if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); } } microVUt(void) mVUincCycles(int x) { @@ -237,14 +241,14 @@ microVUt(void) mVUdivSet() { microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { microVU* mVU = mVUx; - u8* thisPtr = mVUcurProg.x86ptr; - iPC = startPC / 4; + u8* thisPtr = x86Ptr; // Searches for Existing Compiled Block (if found, then returns; else, compile) - microBlock* pblock = mVUblock[iPC/2]->search((microRegInfo*)pState); - if (pblock) { return pblock->x86ptrStart; } + microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); + if (pBlock) { return pBlock->x86ptrStart; } // First Pass + iPC = startPC / 4; setCode(); mVUbranch = 0; mVUstartPC = iPC; @@ -252,6 +256,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage mVU->p = 0; // All blocks start at p index #0 mVU->q = 0; // All blocks start at q index #0 + memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info for (int branch = 0;; ) { startLoop(); mVUopU(); @@ -286,7 +291,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (!isBdelay) { incPC(1); } else { - u32* ajmp; + u32* ajmp = 0; switch (mVUbranch) { case 3: branchCase(JZ32); // IBEQ case 4: branchCase(JGE32); // IBGEZ @@ -295,29 +300,43 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { case 7: branchCase(JL32); // IBLTZ case 8: branchCase(JNZ32); // IBNEQ case 1: case 2: // B/BAL - // ToDo: search for block - // (remember about global variables and recursion!) - mVUsetFlagsRec(bStatus, bMac); - ajmp = JMP32((uptr)0); - break; + incPC(-2); // Go back to branch opcode (to get branch imm addr) + mVUsetupBranch(bStatus, bMac); + + // Check if branch-block has already been compiled + pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); + if (pBlock) { + ajmp = JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); + mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager + } + else { + pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add block + if (!vuIndex) mVUcompileVU0(branchAddr, (uptr)&pBlock->pState); + else mVUcompileVU1(branchAddr, (uptr)&pBlock->pState); + } + //incPC(+2); + return thisPtr; case 9: case 10: // JR/JALR - mVUsetFlagsRec(bStatus, bMac); + mVUsetupBranch(bStatus, bMac); PUSH32R(gprR); // Backup EDX - MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) - AND32ItoR(gprT2, (vuIndex) ? 0x3ff8 : 0xff8); - MOV32ItoR(gprR, (u32)&pblock->pState); // Get pState (EDX second argument for __fastcall) + //MOV32MtoR(gprT1, (uptr)&mVUcurProg.x86ptr); // Get last x86ptr for this program + //MOV32RtoM((uptr)&x86Ptr, gprT1); // Setup x86Ptr to write to correct address + MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) + AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address + pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager + MOV32ItoR(gprR, (u32)&pBlock->pState); // Get pState (EDX second argument for __fastcall) - //ToDo: Add block to block manager and use its address instead of pblock! - - if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) + if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) else CALLFunc((uptr)mVUcompileVU1); - POP32R(gprR); // Restore + POP32R(gprR); // Restore EDX JMPR(gprT1); // Jump to rec-code address - break; + return thisPtr; } - //mVUcurProg.x86Ptr + + + return thisPtr; } } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index c467464241..205fdf0047 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -26,8 +26,7 @@ microVUt(void) mVUdispatcherA() { static u32 PCSX2_ALIGNED16(vuMXCSR); microVU* mVU = mVUx; - x86SetPtr(mVU->ptr); - mVU->startFunct = mVU->ptr; + mVU->startFunct = x86Ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } @@ -67,15 +66,13 @@ microVUt(void) mVUdispatcherA() { // Jump to Recompiled Code Block JMPR(EAX); - mVU->ptr = x86Ptr; } // Generates the code to exit from recompiled blocks microVUt(void) mVUdispatcherB() { static u32 PCSX2_ALIGNED16(eeMXCSR); microVU* mVU = mVUx; - x86SetPtr(mVU->ptr); - mVU->exitFunct = mVU->ptr; + mVU->exitFunct = x86Ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } @@ -111,8 +108,7 @@ microVUt(void) mVUdispatcherB() { EMMS(); RET(); - mVU->ptr = x86Ptr; - mVUcachCheck(mVU->cache, 512); + mVUcacheCheck(x86Ptr, mVU->cache, 512); } //------------------------------------------------------------------ @@ -121,23 +117,16 @@ microVUt(void) mVUdispatcherB() { // Executes for number of cycles microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { -/* - Pseudocode: (ToDo: implement # of cycles) - 1) Search for existing program - 2) If program not found, goto 5 - 3) Search for recompiled block - 4) If recompiled block found, goto 6 - 5) Recompile as much blocks as possible - 6) Return start execution address of block -*/ + microVU* mVU = mVUx; mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); - if ( mVUsearchProg(mVU) ) { // Found Program - //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); - //if (block) return block->x86ptrStart; // Found Block - } - // Recompile code - return NULL; + + // ToDo: Implement Cycles + mVUsearchProg(mVU); // Find and set correct program + + x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off + if (!vuIndex) return mVUcompileVU0(startPC, (uptr)&mVU->prog.lpState); + else return mVUcompileVU1(startPC, (uptr)&mVU->prog.lpState); } //------------------------------------------------------------------ @@ -146,8 +135,8 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; - mVU->ptr = mVUcurProg.x86ptr; - mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); + mVUcurProg.x86ptr = x86Ptr; + mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 31e6cefab4..7937d2990b 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -139,12 +139,13 @@ declareAllVariables #define microVUq(aType) template __forceinline aType #define mVUcurProg mVU->prog.prog[mVU->prog.cur] -#define mVUblock mVU->prog.prog[mVU->prog.cur].block +#define mVUblocks mVU->prog.prog[mVU->prog.cur].block #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUcount mVUallocInfo.count -#define mVUregs mVUallocInfo.regs +#define mVUblock mVUallocInfo.block +#define mVUregs mVUallocInfo.block.pState #define mVUregsTemp mVUallocInfo.regsTemp #define iPC mVUallocInfo.curPC #define mVUinfo mVUallocInfo.info[iPC / 2] @@ -157,6 +158,9 @@ declareAllVariables #define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incCycles(x) { mVUincCycles(x); } #define bSaveAddr ((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) +#define branchAddr ((xPC + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) +#define shufflePQ (((mVU->q) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04)) + #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -235,7 +239,7 @@ declareAllVariables #define mVUdebug1() {} #endif -#define mVUcachCheck(start, limit) { \ - uptr diff = mVU->ptr - start; \ +#define mVUcacheCheck(ptr, start, limit) { \ + uptr diff = ptr - start; \ if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ } diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index 22af9f2aaa..d79cd3c74a 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -17,8 +17,6 @@ */ #pragma once -#include "microVU_Upper.inl" -#include "microVU_Lower.inl" #ifdef PCSX2_MICROVU //------------------------------------------------------------------