mirror of https://github.com/PCSX2/pcsx2.git
microVU: Work in Progress commit, just committing to have a backup!
- Added Simple Constant Propagation to detect Constant Indirect Jump addresses allowing them to act as normal branches. (speedup) - Added a Pipeline State optimization to remove some unnecessary information. (speedup) - Severely altered mVU's memory model to dynamically allocate memory based on how much VU programs are run (and free them when dead). - Made microPrograms recompile to a global rec-cache instead of per-program cache. - Raised VU1's microprogram slots from 64 to 400. - Fixed some memleaks that were causing ram usage to increase over time. - W.I.P. GoW speed hack (not yet in gui) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1453 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
fba2e4519d
commit
d9b5baf8f6
|
@ -48,11 +48,19 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr, int vuIndex) {
|
|||
mVU->cache = NULL;
|
||||
mVU->cacheSize = mVUcacheSize;
|
||||
mVU->prog.max = mMaxProg - 1;
|
||||
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
|
||||
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
|
||||
|
||||
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
||||
if (!mVU->cache) throw Exception::OutOfMemory(fmt_string("microVU Error: Failed to allocate recompiler memory!"));
|
||||
|
||||
|
||||
memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000);
|
||||
memset(mVU->prog.prog, 0, sizeof(microProgram)*(mVU->prog.max+1));
|
||||
|
||||
// Setup Entrance/Exit Points
|
||||
x86SetPtr(mVU->cache);
|
||||
mVUdispatcherA(mVU);
|
||||
mVUdispatcherB(mVU);
|
||||
mVUemitSearch();
|
||||
mVUreset(mVU);
|
||||
}
|
||||
|
@ -63,16 +71,8 @@ microVUt(void) mVUreset(mV) {
|
|||
mVUprint((mVU->index) ? "microVU1: reset" : "microVU0: reset");
|
||||
mVUclose(mVU, 1);
|
||||
|
||||
// Dynarec Cache
|
||||
memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000);
|
||||
|
||||
// Setup Entrance/Exit Points
|
||||
x86SetPtr(mVU->cache);
|
||||
mVUdispatcherA(mVU);
|
||||
mVUdispatcherB(mVU);
|
||||
|
||||
// Clear All Program Data
|
||||
memset(&mVU->prog, 0, sizeof(mVU->prog));
|
||||
//memset(&mVU->prog, 0, sizeof(mVU->prog));
|
||||
memset(&mVU->prog.lpState, 0, sizeof(mVU->prog.lpState));
|
||||
|
||||
// Program Variables
|
||||
|
@ -81,17 +81,14 @@ microVUt(void) mVUreset(mV) {
|
|||
mVU->prog.cur = -1;
|
||||
mVU->prog.total = -1;
|
||||
mVU->prog.max = mMaxProg - 1;
|
||||
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
|
||||
|
||||
memset( mVU->prog.prog, 0, sizeof(microProgram)*(mVU->prog.max+1) );
|
||||
|
||||
// Setup Dynarec Cache Limits for Each Program
|
||||
u8* z = (mVU->cache + 0x1000); // Dispatcher Code is in first page of cache
|
||||
mVU->prog.x86start = z;
|
||||
mVU->prog.x86ptr = z;
|
||||
mVU->prog.x86end = (u8*)((uptr)z + (uptr)(mVU->cacheSize - (mVU->cacheSize*.05)));
|
||||
|
||||
for (int i = 0; i <= mVU->prog.max; i++) {
|
||||
mVU->prog.prog[i].x86start = z;
|
||||
mVU->prog.prog[i].x86ptr = z;
|
||||
z += (mVU->cacheSize / (mVU->prog.max + 1));
|
||||
mVU->prog.prog[i].x86end = z;
|
||||
for (int j = 0; j <= mVU->prog.prog[i].ranges.max; j++) {
|
||||
mVU->prog.prog[i].ranges.range[j][0] = -1; // Set range to
|
||||
mVU->prog.prog[i].ranges.range[j][1] = -1; // indeterminable status
|
||||
|
@ -114,7 +111,7 @@ microVUt(void) mVUclose(mV, bool isReset) {
|
|||
microBlockManager::Delete(mVU->prog.prog[i].block[j]);
|
||||
}
|
||||
}
|
||||
if (!isReset) { _aligned_free(mVU->prog.prog); }
|
||||
if (!isReset) safe_aligned_free(mVU->prog.prog);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,14 +132,15 @@ microVUf(void) mVUclearProg(int progIndex) {
|
|||
microVU* mVU = mVUx;
|
||||
mVUprogI.used = 1;
|
||||
mVUprogI.last_used = 3;
|
||||
mVUprogI.x86ptr = mVUprogI.x86start;
|
||||
//mVUprogI.x86ptr = mVUprogI.x86start;
|
||||
for (int j = 0; j <= mVUprogI.ranges.max; j++) {
|
||||
mVUprogI.ranges.range[j][0] = -1; // Set range to
|
||||
mVUprogI.ranges.range[j][1] = -1; // indeterminable status
|
||||
mVUprogI.ranges.total = -1;
|
||||
}
|
||||
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
|
||||
if (mVUprogI.block[i]) mVUprogI.block[i]->reset();
|
||||
//if (mVUprogI.block[i]) { mVUprogI.block[i]->reset(); }
|
||||
microBlockManager::Delete(mVUprogI.block[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -153,6 +151,7 @@ microVUf(void) mVUcacheProg(int progIndex) {
|
|||
mVUdumpProg(progIndex);
|
||||
}
|
||||
|
||||
#define aWrap(x, nMax) ((x > nMax) ? 0 : x)
|
||||
// Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program)
|
||||
microVUf(int) mVUfindLeastUsedProg() {
|
||||
microVU* mVU = mVUx;
|
||||
|
@ -165,22 +164,48 @@ microVUf(int) mVUfindLeastUsedProg() {
|
|||
return mVU->prog.total;
|
||||
}
|
||||
else {
|
||||
|
||||
const int pMax = mVU->prog.max;
|
||||
int smallidx = (mVU->prog.cur+1)&pMax;
|
||||
u64 smallval = mVU->prog.prog[smallidx].used;
|
||||
/*
|
||||
const int pMax = mVU->prog.max;
|
||||
int smallidx = aWrap((mVU->prog.cur+1), pMax);
|
||||
u64 smallval = mVU->prog.prog[smallidx].used;
|
||||
|
||||
for (int i = 1, j = (smallidx+1)&pMax; i <= pMax; i++, j=(j+1)&pMax) {
|
||||
for (int i = 1, j = aWrap((smallidx+1), pMax); i <= pMax; i++, aWrap((j+1), pMax)) {
|
||||
if (smallval > mVU->prog.prog[j].used) {
|
||||
smallval = mVU->prog.prog[j].used;
|
||||
smallidx = j;
|
||||
}
|
||||
}
|
||||
|
||||
//smallidx = rand() % 200;
|
||||
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
|
||||
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
|
||||
//Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval);
|
||||
|
||||
return smallidx;
|
||||
*/
|
||||
|
||||
/*
|
||||
static int smallidx = 0;
|
||||
const int pMax = mVU->prog.max;
|
||||
smallidx = aWrap((smallidx+1), pMax);
|
||||
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
|
||||
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
|
||||
//Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval);
|
||||
return smallidx;
|
||||
*/
|
||||
|
||||
//mVUreset(mVU);
|
||||
mVU->prog.x86ptr = mVU->prog.x86start;
|
||||
for (int z = 0; z <= mVU->prog.max; z++) {
|
||||
mVUclearProg<vuIndex>(z);
|
||||
mVU->prog.prog[z].used = 0;
|
||||
mVU->prog.prog[z].last_used = 0;
|
||||
}
|
||||
mVU->prog.total = 0;
|
||||
mVUcacheProg<vuIndex>(mVU->prog.total); // Cache Micro Program
|
||||
mVU->prog.prog[mVU->prog.total].used = 1;
|
||||
mVU->prog.prog[mVU->prog.total].last_used = 3;
|
||||
Console::Notice("microVU%d: Cached MicroPrograms = %d", params vuIndex, mVU->prog.total+1);
|
||||
return mVU->prog.total;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,57 +27,75 @@
|
|||
#include "microVU_IR.h"
|
||||
#include "microVU_Misc.h"
|
||||
|
||||
struct microBlockLink {
|
||||
microBlock* block;
|
||||
microBlockLink* next;
|
||||
};
|
||||
|
||||
#define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...)
|
||||
class microBlockManager {
|
||||
private:
|
||||
static const int MaxBlocks = mMaxBlocks - 1;
|
||||
microBlock blockList[mMaxBlocks]; // Should always be first in the class to ensure 16-byte alignment
|
||||
int listSize; // Total Items - 1
|
||||
int listI; // Index to Add new block
|
||||
microBlockLink blockList;
|
||||
microBlockLink* blockEnd;
|
||||
int listI;
|
||||
|
||||
public:
|
||||
// Aligned replacement for 'new'
|
||||
static microBlockManager* AlignedNew() {
|
||||
microBlockManager* alloc = (microBlockManager*)_aligned_malloc(sizeof(microBlockManager), 16);
|
||||
new (alloc) microBlockManager();
|
||||
return alloc;
|
||||
}
|
||||
// Use instead of normal 'delete'
|
||||
static void Delete(microBlockManager* dead) {
|
||||
if (dead == NULL) return;
|
||||
static void Delete(microBlockManager* &dead) {
|
||||
if (!dead) return;
|
||||
dead->~microBlockManager();
|
||||
_aligned_free(dead);
|
||||
safe_delete(dead);
|
||||
}
|
||||
|
||||
microBlockManager() { reset(); }
|
||||
~microBlockManager() {}
|
||||
void reset() { listSize = -1; listI = -1; };
|
||||
microBlockManager() {
|
||||
listI = -1;
|
||||
blockList.block = NULL;
|
||||
blockList.next = NULL;
|
||||
blockEnd = &blockList;
|
||||
}
|
||||
~microBlockManager() { reset(); }
|
||||
void reset() {
|
||||
if (listI >= 0) {
|
||||
microBlockLink* linkI = &blockList;
|
||||
microBlockLink* linkD = NULL;
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
safe_aligned_free(linkI->block);
|
||||
linkI = linkI->next;
|
||||
safe_delete(linkD);
|
||||
linkD = linkI;
|
||||
}
|
||||
safe_delete(linkI);
|
||||
}
|
||||
listI = -1;
|
||||
blockEnd = &blockList;
|
||||
};
|
||||
microBlock* add(microBlock* pBlock) {
|
||||
microBlock* thisBlock = search(&pBlock->pState);
|
||||
if (!thisBlock) {
|
||||
listI++;
|
||||
if (listSize < MaxBlocks) { listSize++; }
|
||||
if (listI > MaxBlocks) { Console::Error("microVU Warning: Block List Overflow"); listI = 0; }
|
||||
memcpy_fast(&blockList[listI], pBlock, sizeof(microBlock));
|
||||
thisBlock = &blockList[listI];
|
||||
blockEnd->block = (microBlock*)_aligned_malloc(sizeof(microBlock), 16);
|
||||
blockEnd->next = new microBlockLink;
|
||||
memcpy_fast(blockEnd->block, pBlock, sizeof(microBlock));
|
||||
thisBlock = blockEnd->block;
|
||||
blockEnd = blockEnd->next;
|
||||
}
|
||||
return thisBlock;
|
||||
}
|
||||
__forceinline microBlock* search(microRegInfo* pState) {
|
||||
if (listSize < 0) return NULL;
|
||||
__releaseinline microBlock* search(microRegInfo* pState) {
|
||||
microBlockLink* linkI = &blockList;
|
||||
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
for (int i = 0; i <= listSize; i++) {
|
||||
if (mVUquickSearch((void*)pState, (void*)&blockList[i].pState, sizeof(microRegInfo))) return &blockList[i];
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
if (mVUquickSearch((void*)pState, (void*)&linkI->block->pState, sizeof(microRegInfo))) return linkI->block;
|
||||
linkI = linkI->next;
|
||||
}
|
||||
}
|
||||
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
for (int i = 0; i <= listSize; i++) {
|
||||
if ((blockList[i].pState.q == pState->q)
|
||||
&& (blockList[i].pState.p == pState->p)
|
||||
&& (blockList[i].pState.xgkick == pState->xgkick)
|
||||
&& (blockList[i].pState.flags == pState->flags)
|
||||
&& !(blockList[i].pState.needExactMatch & 0xf0f)) { return &blockList[i]; }
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
if ((linkI->block->pState.q == pState->q)
|
||||
&& (linkI->block->pState.p == pState->p)
|
||||
&& (linkI->block->pState.vi15 == pState->vi15)
|
||||
&& (linkI->block->pState.flags == pState->flags)
|
||||
&& (linkI->block->pState.xgkick == pState->xgkick)
|
||||
&& !(linkI->block->pState.needExactMatch & 0xf0f)) { return linkI->block; }
|
||||
linkI = linkI->next;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
@ -95,27 +113,27 @@ struct microRange {
|
|||
struct microProgram {
|
||||
u32 data [mProgSize]; // Holds a copy of the VU microProgram
|
||||
microBlockManager* block[mProgSize/2]; // Array of Block Managers
|
||||
microIR<mProgSize> allocInfo; // IR information
|
||||
microRange ranges; // The ranges of the microProgram that have already been recompiled
|
||||
u64 used; // Number of times its been used
|
||||
u32 last_used; // Counters # of frames since last use (starts at 3 and counts backwards to 0 for each 30fps vSync)
|
||||
u8* x86ptr; // Pointer to program's recompilation code
|
||||
u8* x86start; // Start of program's rec-cache
|
||||
u8* x86end; // Limit of program's rec-cache
|
||||
};
|
||||
|
||||
#define mMaxProg ((mVU->index)?64:8) // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)
|
||||
#define mMaxProg ((mVU->index)?400:8) // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)
|
||||
struct microProgManager {
|
||||
microProgram* prog; // Store MicroPrograms in memory
|
||||
int max; // Max Number of MicroPrograms minus 1
|
||||
int total; // Total Number of valid MicroPrograms minus 1
|
||||
int cur; // Index to Current MicroProgram thats running (-1 = uncached)
|
||||
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro (-1 = unknown, 0 = No, 1 = Yes)
|
||||
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
|
||||
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
|
||||
microIR<mProgSize> allocInfo; // IR information
|
||||
microProgram* prog; // Store MicroPrograms in memory
|
||||
int max; // Max Number of MicroPrograms minus 1
|
||||
int total; // Total Number of valid MicroPrograms minus 1
|
||||
int cur; // Index to Current MicroProgram thats running (-1 = uncached)
|
||||
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro (-1 = unknown, 0 = No, 1 = Yes)
|
||||
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
|
||||
u8* x86ptr; // Pointer to program's recompilation code
|
||||
u8* x86start; // Start of program's rec-cache
|
||||
u8* x86end; // Limit of program's rec-cache
|
||||
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
|
||||
};
|
||||
|
||||
#define mVUcacheSize (mMaxProg * 0xCCCCC) // 0.8mb per program
|
||||
#define mVUcacheSize (mMaxProg * (0x100000 * 0.5)) // 0.5mb per program
|
||||
struct microVU {
|
||||
|
||||
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
||||
|
|
|
@ -102,6 +102,7 @@
|
|||
// Writing to a VI reg
|
||||
#define analyzeVIreg2(xReg, viWrite, aCycles) { \
|
||||
if (xReg) { \
|
||||
mVUconstReg[xReg].isValid = 0; \
|
||||
mVUregsTemp.VIreg = xReg; \
|
||||
mVUregsTemp.VI = aCycles; \
|
||||
viWrite.reg = xReg; \
|
||||
|
@ -114,6 +115,7 @@
|
|||
#define analyzeRreg() { mVUregsTemp.r = 1; }
|
||||
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
|
||||
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
|
||||
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC1 - Normal FMAC Opcodes
|
||||
|
@ -173,6 +175,11 @@ microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
|
|||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeIADDI(mV, int Is, int It, s16 imm) {
|
||||
mVUanalyzeIALU2(mVU, Is, It);
|
||||
if (!Is) { setConstReg(It, imm); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MR32 - MR32 Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
@ -386,3 +393,16 @@ microVUt(void) mVUanalyzeBranch2(mV, int Is, int It) {
|
|||
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
|
||||
if (mVUconstReg[Is].isValid && !CHECK_VU_CONSTHACK) {
|
||||
mVUlow.constJump.isValid = 1;
|
||||
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
|
||||
//DevCon::Status("microVU%d: Constant JR/JALR Address Optimization", params mVU->index);
|
||||
}
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
if (isJALR) {
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
setConstReg(It, bSaveAddr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
}
|
||||
|
||||
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
|
||||
#define tCycles(dest, src) { dest = aMax(dest, src); }
|
||||
#define incP() { mVU->p = (mVU->p+1) & 1; }
|
||||
#define incQ() { mVU->q = (mVU->q+1) & 1; }
|
||||
|
@ -69,7 +70,7 @@
|
|||
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
|
||||
#define doSwapOp() { doBackupVF1(); mVUopL(mVU, 1); doBackupVF2(); incPC(1); doUpperOp(); doBackupVF3(); }
|
||||
#define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } }
|
||||
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = microBlockManager::AlignedNew(); }
|
||||
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager();/*microBlockManager::AlignedNew();*/ }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Helper Functions
|
||||
|
@ -144,6 +145,20 @@ microVUt(void) mVUsetupRange(mV, s32 pc, bool isStartPC) {
|
|||
}
|
||||
}
|
||||
|
||||
// Optimizes the End Pipeline State Removing Unnecessary Info
|
||||
microVUt(void) mVUoptimizePipeState(mV) {
|
||||
for (int i = 0; i < 32; i++) {
|
||||
optimizeReg(mVUregs.VF[i].x);
|
||||
optimizeReg(mVUregs.VF[i].y);
|
||||
optimizeReg(mVUregs.VF[i].z);
|
||||
optimizeReg(mVUregs.VF[i].w);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
optimizeReg(mVUregs.VI[i]);
|
||||
}
|
||||
mVUregs.r = 0;
|
||||
}
|
||||
|
||||
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
|
||||
microVUt(void) mVUsetupBranch(mV, int* xStatus, int* xMac, int* xClip, int xCycles) {
|
||||
mVUprint("mVUsetupBranch");
|
||||
|
@ -324,6 +339,15 @@ microVUt(void) mVUtestCycles(mV) {
|
|||
x86SetJ32(jmp32);
|
||||
}
|
||||
|
||||
microVUt(void) mVUinitConstValues(mV) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
mVUconstReg[i].isValid = 0;
|
||||
mVUconstReg[i].regValue = 0;
|
||||
}
|
||||
mVUconstReg[15].isValid = mVUregs.vi15 >> 31;
|
||||
mVUconstReg[15].regValue = mVUconstReg[15].isValid ? (mVUregs.vi15&0xffff) : 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Recompiler
|
||||
//------------------------------------------------------------------
|
||||
|
@ -349,12 +373,14 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
|||
mVU->q = 0; // All blocks start at q index #0
|
||||
memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info
|
||||
mVUblock.x86ptrStart = thisPtr;
|
||||
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
|
||||
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
|
||||
mVUpBlock = pBlock;
|
||||
mVUregs.flags = 0;
|
||||
mVUflagInfo = 0;
|
||||
mVUsFlagHack = CHECK_VU_FLAGHACK;
|
||||
|
||||
mVUinitConstValues(mVU);
|
||||
|
||||
for (int branch = 0; mVUcount < endCount; mVUcount++) {
|
||||
incPC(1);
|
||||
startLoop();
|
||||
|
@ -381,6 +407,12 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
|||
int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip);
|
||||
mVUtestCycles(mVU);
|
||||
|
||||
// Fix up vi15 const info for propagation through blocks
|
||||
mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
|
||||
|
||||
// Optimize the End Pipeline State for nicer Block Linking
|
||||
mVUoptimizePipeState(mVU);
|
||||
|
||||
// Second Pass
|
||||
iPC = mVUstartPC;
|
||||
setCode();
|
||||
|
@ -419,14 +451,31 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
|||
// Check if branch-block has already been compiled
|
||||
blockCreate(branchAddr/8);
|
||||
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
|
||||
if (pBlock) { xJMP(pBlock->x86ptrStart); }
|
||||
else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); }
|
||||
if (pBlock) { xJMP(pBlock->x86ptrStart); }
|
||||
else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); }
|
||||
return thisPtr;
|
||||
case 9: case 10: // JR/JALR
|
||||
|
||||
mVUprint("mVUcompile JR/JALR");
|
||||
incPC(-3); // Go back to jump opcode
|
||||
|
||||
if (mVUlow.constJump.isValid) {
|
||||
if (mVUup.eBit) { // E-bit Jump
|
||||
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);
|
||||
mVUendProgram(mVU, 1, xStatus, xMac, xClip);
|
||||
}
|
||||
else {
|
||||
int jumpAddr = (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8);
|
||||
mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles);
|
||||
// Check if jump-to-block has already been compiled
|
||||
blockCreate(jumpAddr/8);
|
||||
pBlock = mVUblocks[jumpAddr/8]->search((microRegInfo*)&mVUregs);
|
||||
if (pBlock) { xJMP(pBlock->x86ptrStart); }
|
||||
else { mVUcompile(mVU, jumpAddr, (uptr)&mVUregs); }
|
||||
}
|
||||
return thisPtr;
|
||||
}
|
||||
|
||||
if (mVUup.eBit) { // E-bit Jump
|
||||
mVUendProgram(mVU, 2, xStatus, xMac, xClip);
|
||||
MOV32MtoR(gprT1, (uptr)&mVU->branch);
|
||||
|
|
|
@ -127,7 +127,7 @@ microVUx(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
|
|||
mVU->cycles = cycles;
|
||||
mVU->totalCycles = cycles;
|
||||
|
||||
x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off
|
||||
x86SetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
|
||||
return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState);
|
||||
}
|
||||
|
||||
|
@ -140,8 +140,8 @@ microVUx(void) mVUcleanUp() {
|
|||
//mVUprint("microVU: Program exited successfully!");
|
||||
//mVUprint("microVU: VF0 = {%x,%x,%x,%x}", params mVU->regs->VF[0].UL[0], mVU->regs->VF[0].UL[1], mVU->regs->VF[0].UL[2], mVU->regs->VF[0].UL[3]);
|
||||
//mVUprint("microVU: VI0 = %x", params mVU->regs->VI[0].UL);
|
||||
mVUcurProg.x86ptr = x86Ptr;
|
||||
mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
|
||||
mVU->prog.x86ptr = x86Ptr;
|
||||
mVUcacheCheck(x86Ptr, mVU->prog.x86start, (uptr)(mVU->prog.x86end - mVU->prog.x86start));
|
||||
mVU->cycles = mVU->totalCycles - mVU->cycles;
|
||||
mVU->regs->cycle += mVU->cycles;
|
||||
cpuRegs.cycle += ((mVU->cycles < 3000) ? mVU->cycles : 3000) * Config.Hacks.VUCycleSteal;
|
||||
|
|
|
@ -265,7 +265,10 @@ void mVUflagPass(mV, u32 startPC, u32 xCount) {
|
|||
// Checks if the first 4 instructions of a block will read flags
|
||||
microVUt(void) mVUsetFlagInfo(mV) {
|
||||
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr, 4); incPC(1); }
|
||||
branchType2 { mVUflagInfo |= 0xfff; }
|
||||
branchType2 {
|
||||
if (!mVUlow.constJump.isValid) { mVUflagInfo |= 0xfff; }
|
||||
else { mVUflagPass(mVU, mVUlow.constJump.regValue, 4); }
|
||||
}
|
||||
branchType3 {
|
||||
incPC(-1);
|
||||
mVUflagPass(mVU, branchAddr, 4);
|
||||
|
|
|
@ -35,6 +35,7 @@ union regInfo {
|
|||
|
||||
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
||||
u32 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 r;
|
||||
|
@ -42,7 +43,7 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
|||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u8 padding[7]; // 160 bytes
|
||||
u8 padding[3]; // 160 bytes
|
||||
#if defined(_MSC_VER)
|
||||
};
|
||||
#else
|
||||
|
@ -84,6 +85,11 @@ struct microVIreg {
|
|||
u8 used; // Reg is Used? (Read/Written)
|
||||
};
|
||||
|
||||
struct microConstInfo {
|
||||
u8 isValid; // Is the constant in regValue valid?
|
||||
u32 regValue; // Constant Value
|
||||
};
|
||||
|
||||
struct microUpperOp {
|
||||
bool eBit; // Has E-bit set
|
||||
bool iBit; // Has I-bit set
|
||||
|
@ -93,10 +99,11 @@ struct microUpperOp {
|
|||
};
|
||||
|
||||
struct microLowerOp {
|
||||
microVFreg VF_write; // VF Vectors written to by this instruction
|
||||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
microVIreg VI_write; // VI reg written to by this instruction
|
||||
microVIreg VI_read[2]; // VI regs read by this instruction
|
||||
microVFreg VF_write; // VF Vectors written to by this instruction
|
||||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
microVIreg VI_write; // VI reg written to by this instruction
|
||||
microVIreg VI_read[2]; // VI regs read by this instruction
|
||||
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
|
||||
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
|
||||
bool isNOP; // This instruction is a NOP
|
||||
bool isFSSET; // This instruction is a FSSET
|
||||
|
@ -137,10 +144,11 @@ struct microOp {
|
|||
|
||||
template<u32 pSize>
|
||||
struct microIR {
|
||||
microBlock block; // Block/Pipeline info
|
||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||
microBlock block; // Block/Pipeline info
|
||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||
microOp info[pSize/2]; // Info for Instructions in current block
|
||||
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
|
||||
u8 branch;
|
||||
u32 cycles; // Cycles for current block
|
||||
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
||||
|
|
|
@ -612,7 +612,7 @@ mVUop(mVU_IADD) {
|
|||
}
|
||||
|
||||
mVUop(mVU_IADDI) {
|
||||
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
|
||||
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
|
||||
pass2 {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
ADD16ItoR(gprT1, _Imm5_);
|
||||
|
@ -622,7 +622,7 @@ mVUop(mVU_IADDI) {
|
|||
}
|
||||
|
||||
mVUop(mVU_IADDIU) {
|
||||
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
|
||||
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
|
||||
pass2 {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
ADD16ItoR(gprT1, _Imm15_);
|
||||
|
@ -1127,7 +1127,7 @@ mVUop(mVU_B) {
|
|||
|
||||
mVUop(mVU_BAL) {
|
||||
setBranchA(2, _It_);
|
||||
pass1 { analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
|
||||
pass1 { analyzeVIreg2(_It_, mVUlow.VI_write, 1); setConstReg(_It_, bSaveAddr); }
|
||||
pass2 {
|
||||
MOV32ItoR(gprT1, bSaveAddr);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
|
@ -1207,24 +1207,28 @@ mVUop(mVU_IBNE) {
|
|||
|
||||
mVUop(mVU_JR) {
|
||||
mVUbranch = 9;
|
||||
pass1 { analyzeVIreg1(_Is_, mVUlow.VI_read[0]); }
|
||||
pass1 { mVUanalyzeJump(mVU, _Is_, 0, 0); }
|
||||
pass2 {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
SHL32ItoR(gprT1, 3);
|
||||
AND32ItoR(gprT1, isVU1 ? 0x3ff8 : 0xff8);
|
||||
MOV32RtoM((uptr)&mVU->branch, gprT1);
|
||||
if (!mVUlow.constJump.isValid) {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
SHL32ItoR(gprT1, 3);
|
||||
AND32ItoR(gprT1, mVU->microMemSize - 8);
|
||||
MOV32RtoM((uptr)&mVU->branch, gprT1);
|
||||
}
|
||||
}
|
||||
pass3 { mVUlog("JR [vi%02d]", _Fs_); }
|
||||
}
|
||||
|
||||
mVUop(mVU_JALR) {
|
||||
mVUbranch = 10;
|
||||
pass1 { analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
|
||||
pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); }
|
||||
pass2 {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
SHL32ItoR(gprT1, 3);
|
||||
AND32ItoR(gprT1, isVU1 ? 0x3ff8 : 0xff8);
|
||||
MOV32RtoM((uptr)&mVU->branch, gprT1);
|
||||
if (!mVUlow.constJump.isValid) {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
SHL32ItoR(gprT1, 3);
|
||||
AND32ItoR(gprT1, mVU->microMemSize - 8);
|
||||
MOV32RtoM((uptr)&mVU->branch, gprT1);
|
||||
}
|
||||
MOV32ItoR(gprT1, bSaveAddr);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
|
|
|
@ -187,7 +187,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
#define mVUprogI mVU->prog.prog[progIndex]
|
||||
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
|
||||
#define mVUblocks mVU->prog.prog[mVU->prog.cur].block
|
||||
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
|
||||
#define mVUallocInfo mVU->prog.allocInfo
|
||||
#define mVUbranch mVUallocInfo.branch
|
||||
#define mVUcycles mVUallocInfo.cycles
|
||||
#define mVUcount mVUallocInfo.count
|
||||
|
@ -198,6 +198,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
#define iPC mVUallocInfo.curPC
|
||||
#define mVUsFlagHack mVUallocInfo.sFlagHack
|
||||
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
||||
#define mVUconstReg mVUallocInfo.constReg
|
||||
#define mVUstall mVUinfo.stall
|
||||
#define mVUup mVUinfo.uOp
|
||||
#define mVUlow mVUinfo.lOp
|
||||
|
@ -212,7 +213,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
#define setCode() { mVU->code = curI; }
|
||||
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
|
||||
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
|
||||
#define bSaveAddr (((xPC + (2 * 8)) & ((isVU1) ? 0x3ff8:0xff8)) / 8)
|
||||
#define bSaveAddr (((xPC + 16) & (mVU->microMemSize-8)) / 8)
|
||||
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & (mVU->microMemSize-8))
|
||||
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
|
||||
#define cmpOffset(x) ((u8*)&(((u8*)x)[mVUprogI.ranges.range[i][0]]))
|
||||
|
@ -272,6 +273,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
#endif
|
||||
|
||||
// Speed Hacks
|
||||
#define CHECK_VU_CONSTHACK 0 // Only use for GoW (will be slower on other games)
|
||||
#define CHECK_VU_FLAGHACK (u32)Config.Hacks.vuFlagHack // (Can cause Infinite loops, SPS, etc...)
|
||||
#define CHECK_VU_MINMAXHACK (u32)Config.Hacks.vuMinMax // (Can cause SPS, Black Screens, etc...)
|
||||
|
||||
|
@ -282,12 +284,12 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
|
||||
|
||||
// Cache Limit Check
|
||||
#define mVUcacheCheck(ptr, start, limit) { \
|
||||
uptr diff = ptr - start; \
|
||||
if (diff >= limit) { \
|
||||
Console::Error("microVU Error: Program went over its cache limit. Size = 0x%x", params diff); \
|
||||
mVUreset(mVU); \
|
||||
} \
|
||||
#define mVUcacheCheck(ptr, start, limit) { \
|
||||
uptr diff = ptr - start; \
|
||||
if (diff >= limit) { \
|
||||
Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", params mVU->index, diff); \
|
||||
mVUreset(mVU); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define mVUdebugNOW(isEndPC) { \
|
||||
|
|
Loading…
Reference in New Issue