microVU: mostly block-linking/execution stuff...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1003 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-18 04:28:24 +00:00
parent 14f306a800
commit 45e9f544cd
8 changed files with 105 additions and 111 deletions

View File

@ -63,9 +63,9 @@ microVUt(void) mVUreset() {
// Dynarec Cache
mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache));
mVU->ptr = mVU->cache;
// Setup Entrance/Exit Points
x86SetPtr(mVU->cache);
mVUdispatcherA<vuIndex>();
mVUdispatcherB<vuIndex>();
@ -105,6 +105,7 @@ microVUt(void) mVUclose() {
microVUt(void) mVUclear(u32 addr, u32 size) {
microVU* mVU = mVUx;
memset(&mVU->prog.lpState, 0, sizeof(mVU->prog.lpState));
mVU->prog.cleared = 1; // Next execution searches/creates a new microprogram
// Note: It might be better to copy old recompiled blocks to the new microprogram rec data
// however, if games primarily do big writes, its probably not worth it.
@ -157,6 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) {
if (mVU->prog.cleared) { // If cleared, we need to search for new program
for (int i = 0; i <= mVU->prog.total; i++) {
//if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/)
//if (mVU->prog.prog[i]) // ToDo: Implement Cycles
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); }
mVU->prog.cur = i;
@ -172,7 +174,7 @@ __forceinline int mVUsearchProg(microVU* mVU) {
mVU->prog.prog[mVU->prog.cur].used++;
return 1; // If !cleared, then we're still on the same program as last-time ;)
}
/*
// Block Invalidation
__forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) {
@ -192,7 +194,7 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) {
mVU->prog.prog[mVU->prog.cur].block[i]->clearFast();
}
}
*/
//------------------------------------------------------------------
// Wrapper Functions - Called by other parts of the Emu
//------------------------------------------------------------------

View File

@ -25,67 +25,40 @@
#include "microVU_Alloc.h"
#include "microVU_Misc.h"
struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
u32 pipelineState; // | FDiv x 4 | EFU x 6 | Needs pState Info? x 1 | // Simple State of Pipeline
u8* x86ptrStart; // Start of code
u8* x86ptrEnd; // End of code (first byte outside of block)
u8* x86ptrBranch; //
u32 size; // Number of 64bit VU Instructions in Block
};
#define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...)
class microBlockManager {
private:
static const int MaxBlocks = mMaxBlocks - 1;
u32 startPC;
u32 endPC;
int listSize; // Total Items - 1
int callerSize; // Total Callers - 1
microBlock blockList[mMaxBlocks];
microBlock callersList[mMaxBlocks]; // Foreign Blocks that call Local Blocks
public:
microBlockManager() { init(); }
~microBlockManager() { close(); }
void init() {
listSize = -1;
callerSize = -1;
//ZeroMemory(&blockList, sizeof(blockList)); // Can be Omitted?
//ZeroMemory(&blockList, sizeof(callersList)); // Can be Omitted?
}
void reset() { init(); };
void close() {}; // Can be Omitted?
/*void add(u32 pipelineState, u8* x86ptrStart) {
if (!search(pipelineState)) {
microBlockManager() { reset(); }
~microBlockManager() {}
void reset() { listSize = -1; };
microBlock* add(microBlock* pBlock) {
microBlock* thisBlock = search(&pBlock->pState);
if (!thisBlock) {
listSize++;
listSize &= MaxBlocks;
blockList[listSize].pipelineState = pipelineState;
blockList[listSize].x86ptrStart = x86ptrStart;
memcpy_fast(&blockList[listSize], pBlock, sizeof(microBlock));
thisBlock = &blockList[listSize];
}
}*/
microBlock* search(/*u32 pipelineState,*/ microRegInfo* pState) {
/*if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i < listSize; i++) {
return thisBlock;
}
microBlock* search(microRegInfo* pState) {
if (listSize < 0) return NULL;
if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i <= listSize; i++) {
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i];
}
}
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
for (int i = 0; i < listSize; i++) {
if (blockList[i].pipelineState == pipelineState) return &blockList[i];
for (int i = 0; i <= listSize; i++) {
if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)) { return &blockList[i]; }
}
}*/
return NULL;
}
void clearFast() {
listSize = -1;
for ( ; callerSize >= 0; callerSize--) {
//callerList[callerSize]. // ToDo: Implement Branch Link Removal Code
}
}
int clear() {
if (listSize >= 0) { clearFast(); return 1; }
else return 0;
return NULL;
}
};
@ -109,7 +82,7 @@ struct microProgManager {
int total; // Total Number of valid MicroPrograms minus 1
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
int finished; // Completed MicroProgram by E-bit Termination
u32 lastPipelineState; // Pipeline state from where it left off (useful for continuing execution)
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
};
struct microVU {
@ -124,7 +97,6 @@ struct microVU {
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
u8* startFunct; // Ptr Function to the Start code for recompiled programs
u8* exitFunct; // Ptr Function to the Exit code for recompiled programs
u8* ptr; // Pointer to next place to write recompiled code to
u32 code; // Contains the current Instruction
u32 iReg; // iReg (only used in recompilation, not execution)
u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
@ -167,6 +139,8 @@ typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if
#include "microVU_Misc.inl"
#include "microVU_Analyze.inl"
#include "microVU_Alloc.inl"
#include "microVU_Upper.inl"
#include "microVU_Lower.inl"
#include "microVU_Tables.inl"
#include "microVU_Compile.inl"
#include "microVU_Execute.inl"

View File

@ -18,6 +18,7 @@
#pragma once
union regInfo {
u32 reg;
struct {
@ -49,9 +50,16 @@ struct microTempRegInfo {
u8 xgkick; // Holds the cycle info for XGkick
};
struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
u8* x86ptrStart; // Start of code
//u8* x86ptrEnd; // End of code (first byte outside of block)
//u32 size; // Number of 64bit VU Instructions in Block
};
template<u32 pSize>
struct microAllocInfo {
microRegInfo regs; // Pipeline info
microBlock block; // Block/Pipeline info
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR
u32 cycles; // Cycles for current block

View File

@ -247,7 +247,8 @@ microVUt(void) mVUanalyzeSflag(int It) {
if (!It) { mVUinfo |= _isNOP; }
else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from
mVUinfo |= _swapOps;
if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); }
if (mVUcount < 4) { mVUregs.needExactMatch = 1; }
if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); }
// Note: _isSflag is used for status flag optimizations.
// Do to stalls, it can only be set one instruction prior to the status flag read instruction
// if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior.
@ -274,14 +275,13 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) {
if (!It) { mVUinfo |= _isNOP; }
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
mVUinfo |= _swapOps;
if (mVUcount > 1) {
int curPC = iPC;
for (int i = mVUcount, j = 0; i > 1; i--, j++) {
incPC(-2);
if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } }
}
iPC = curPC;
if (mVUcount < 4) { mVUregs.needExactMatch = 1; }
int curPC = iPC;
for (int i = mVUcount, j = 0; i > 1; i--, j++) {
incPC(-2);
if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } }
}
iPC = curPC;
}
analyzeVIreg1(Is);
analyzeVIreg2(It, 1);

View File

@ -143,8 +143,9 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
#define getFlagReg2(x) ((x == bStatus[3]) ? gprESP : ((x == bStatus[2]) ? gprR : ((x == bStatus[1]) ? gprT2 : gprT1)))
// Recompiles Code for Proper Flags on Block Linkings
microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) {
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) {
microVU* mVU = mVUx;
PUSH32R(gprR); // Backup gprR
PUSH32R(gprESP); // Backup gprESP
@ -176,6 +177,9 @@ microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) {
POP32R(gprESP); // Restore gprESP
POP32R(gprR); // Restore gprR
// Shuffle P/Q regs since every block starts at instance #0
if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); }
}
microVUt(void) mVUincCycles(int x) {
@ -237,14 +241,14 @@ microVUt(void) mVUdivSet() {
microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
microVU* mVU = mVUx;
u8* thisPtr = mVUcurProg.x86ptr;
iPC = startPC / 4;
u8* thisPtr = x86Ptr;
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pblock = mVUblock[iPC/2]->search((microRegInfo*)pState);
if (pblock) { return pblock->x86ptrStart; }
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
if (pBlock) { return pBlock->x86ptrStart; }
// First Pass
iPC = startPC / 4;
setCode();
mVUbranch = 0;
mVUstartPC = iPC;
@ -252,6 +256,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage
mVU->p = 0; // All blocks start at p index #0
mVU->q = 0; // All blocks start at q index #0
memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info
for (int branch = 0;; ) {
startLoop();
mVUopU<vuIndex, 0>();
@ -286,7 +291,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (!isBdelay) { incPC(1); }
else {
u32* ajmp;
u32* ajmp = 0;
switch (mVUbranch) {
case 3: branchCase(JZ32); // IBEQ
case 4: branchCase(JGE32); // IBGEZ
@ -295,29 +300,43 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
case 7: branchCase(JL32); // IBLTZ
case 8: branchCase(JNZ32); // IBNEQ
case 1: case 2: // B/BAL
// ToDo: search for block
// (remember about global variables and recursion!)
mVUsetFlagsRec<vuIndex>(bStatus, bMac);
ajmp = JMP32((uptr)0);
break;
incPC(-2); // Go back to branch opcode (to get branch imm addr)
mVUsetupBranch<vuIndex>(bStatus, bMac);
// Check if branch-block has already been compiled
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) {
ajmp = JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5));
mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
}
else {
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add block
if (!vuIndex) mVUcompileVU0(branchAddr, (uptr)&pBlock->pState);
else mVUcompileVU1(branchAddr, (uptr)&pBlock->pState);
}
//incPC(+2);
return thisPtr;
case 9: case 10: // JR/JALR
mVUsetFlagsRec<vuIndex>(bStatus, bMac);
mVUsetupBranch<vuIndex>(bStatus, bMac);
PUSH32R(gprR); // Backup EDX
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
AND32ItoR(gprT2, (vuIndex) ? 0x3ff8 : 0xff8);
MOV32ItoR(gprR, (u32)&pblock->pState); // Get pState (EDX second argument for __fastcall)
//MOV32MtoR(gprT1, (uptr)&mVUcurProg.x86ptr); // Get last x86ptr for this program
//MOV32RtoM((uptr)&x86Ptr, gprT1); // Setup x86Ptr to write to correct address
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
MOV32ItoR(gprR, (u32)&pBlock->pState); // Get pState (EDX second argument for __fastcall)
//ToDo: Add block to block manager and use its address instead of pblock!
if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
else CALLFunc((uptr)mVUcompileVU1);
POP32R(gprR); // Restore
POP32R(gprR); // Restore EDX
JMPR(gprT1); // Jump to rec-code address
break;
return thisPtr;
}
//mVUcurProg.x86Ptr
return thisPtr;
}
}

View File

@ -26,8 +26,7 @@
microVUt(void) mVUdispatcherA() {
static u32 PCSX2_ALIGNED16(vuMXCSR);
microVU* mVU = mVUx;
x86SetPtr(mVU->ptr);
mVU->startFunct = mVU->ptr;
mVU->startFunct = x86Ptr;
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); }
@ -67,15 +66,13 @@ microVUt(void) mVUdispatcherA() {
// Jump to Recompiled Code Block
JMPR(EAX);
mVU->ptr = x86Ptr;
}
// Generates the code to exit from recompiled blocks
microVUt(void) mVUdispatcherB() {
static u32 PCSX2_ALIGNED16(eeMXCSR);
microVU* mVU = mVUx;
x86SetPtr(mVU->ptr);
mVU->exitFunct = mVU->ptr;
mVU->exitFunct = x86Ptr;
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); }
@ -111,8 +108,7 @@ microVUt(void) mVUdispatcherB() {
EMMS();
RET();
mVU->ptr = x86Ptr;
mVUcachCheck(mVU->cache, 512);
mVUcacheCheck(x86Ptr, mVU->cache, 512);
}
//------------------------------------------------------------------
@ -121,23 +117,16 @@ microVUt(void) mVUdispatcherB() {
// Executes for number of cycles
microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
/*
Pseudocode: (ToDo: implement # of cycles)
1) Search for existing program
2) If program not found, goto 5
3) Search for recompiled block
4) If recompiled block found, goto 6
5) Recompile as much blocks as possible
6) Return start execution address of block
*/
microVU* mVU = mVUx;
mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles);
if ( mVUsearchProg(mVU) ) { // Found Program
//microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState);
//if (block) return block->x86ptrStart; // Found Block
}
// Recompile code
return NULL;
// ToDo: Implement Cycles
mVUsearchProg(mVU); // Find and set correct program
x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off
if (!vuIndex) return mVUcompileVU0(startPC, (uptr)&mVU->prog.lpState);
else return mVUcompileVU1(startPC, (uptr)&mVU->prog.lpState);
}
//------------------------------------------------------------------
@ -146,8 +135,8 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
microVUt(void) mVUcleanUp() {
microVU* mVU = mVUx;
mVU->ptr = mVUcurProg.x86ptr;
mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
mVUcurProg.x86ptr = x86Ptr;
mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
}
//------------------------------------------------------------------

View File

@ -139,12 +139,13 @@ declareAllVariables
#define microVUq(aType) template<int vuIndex, int recPass> __forceinline aType
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
#define mVUblock mVU->prog.prog[mVU->prog.cur].block
#define mVUblocks mVU->prog.prog[mVU->prog.cur].block
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
#define mVUbranch mVUallocInfo.branch
#define mVUcycles mVUallocInfo.cycles
#define mVUcount mVUallocInfo.count
#define mVUregs mVUallocInfo.regs
#define mVUblock mVUallocInfo.block
#define mVUregs mVUallocInfo.block.pState
#define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC
#define mVUinfo mVUallocInfo.info[iPC / 2]
@ -157,6 +158,9 @@ declareAllVariables
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
#define incCycles(x) { mVUincCycles<vuIndex>(x); }
#define bSaveAddr ((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
#define branchAddr ((xPC + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
#define shufflePQ (((mVU->q) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
#define _isNOP (1<<0) // Skip Lower Instruction
#define _isBranch (1<<1) // Cur Instruction is a Branch
@ -235,7 +239,7 @@ declareAllVariables
#define mVUdebug1() {}
#endif
#define mVUcachCheck(start, limit) { \
uptr diff = mVU->ptr - start; \
#define mVUcacheCheck(ptr, start, limit) { \
uptr diff = ptr - start; \
if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \
}

View File

@ -17,8 +17,6 @@
*/
#pragma once
#include "microVU_Upper.inl"
#include "microVU_Lower.inl"
#ifdef PCSX2_MICROVU
//------------------------------------------------------------------