microVU: Work in Progress commit, just committing to have a backup!

- Added Simple Constant Propagation to detect Constant Indirect Jump addresses allowing them to act as normal branches. (speedup)
- Added a Pipeline State optimization to remove some unnecessary information. (speedup)
- Severely altered mVU's memory model to dynamically allocate memory based on how much VU programs are run (and free them when dead).
- Made microPrograms recompile to a global rec-cache instead of per-program cache.
- Raised VU1's microprogram slots from 64 to 400.
- Fixed some memleaks that were causing ram usage to increase over time.
- W.I.P. GoW speed hack (not yet in gui)


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1453 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-07-02 23:57:09 +00:00
parent fba2e4519d
commit d9b5baf8f6
9 changed files with 237 additions and 108 deletions

View File

@ -48,11 +48,19 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr, int vuIndex) {
mVU->cache = NULL;
mVU->cacheSize = mVUcacheSize;
mVU->prog.max = mMaxProg - 1;
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if (!mVU->cache) throw Exception::OutOfMemory(fmt_string("microVU Error: Failed to allocate recompiler memory!"));
memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000);
memset(mVU->prog.prog, 0, sizeof(microProgram)*(mVU->prog.max+1));
// Setup Entrance/Exit Points
x86SetPtr(mVU->cache);
mVUdispatcherA(mVU);
mVUdispatcherB(mVU);
mVUemitSearch();
mVUreset(mVU);
}
@ -63,16 +71,8 @@ microVUt(void) mVUreset(mV) {
mVUprint((mVU->index) ? "microVU1: reset" : "microVU0: reset");
mVUclose(mVU, 1);
// Dynarec Cache
memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000);
// Setup Entrance/Exit Points
x86SetPtr(mVU->cache);
mVUdispatcherA(mVU);
mVUdispatcherB(mVU);
// Clear All Program Data
memset(&mVU->prog, 0, sizeof(mVU->prog));
//memset(&mVU->prog, 0, sizeof(mVU->prog));
memset(&mVU->prog.lpState, 0, sizeof(mVU->prog.lpState));
// Program Variables
@ -81,17 +81,14 @@ microVUt(void) mVUreset(mV) {
mVU->prog.cur = -1;
mVU->prog.total = -1;
mVU->prog.max = mMaxProg - 1;
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
memset( mVU->prog.prog, 0, sizeof(microProgram)*(mVU->prog.max+1) );
// Setup Dynarec Cache Limits for Each Program
u8* z = (mVU->cache + 0x1000); // Dispatcher Code is in first page of cache
mVU->prog.x86start = z;
mVU->prog.x86ptr = z;
mVU->prog.x86end = (u8*)((uptr)z + (uptr)(mVU->cacheSize - (mVU->cacheSize*.05)));
for (int i = 0; i <= mVU->prog.max; i++) {
mVU->prog.prog[i].x86start = z;
mVU->prog.prog[i].x86ptr = z;
z += (mVU->cacheSize / (mVU->prog.max + 1));
mVU->prog.prog[i].x86end = z;
for (int j = 0; j <= mVU->prog.prog[i].ranges.max; j++) {
mVU->prog.prog[i].ranges.range[j][0] = -1; // Set range to
mVU->prog.prog[i].ranges.range[j][1] = -1; // indeterminable status
@ -114,7 +111,7 @@ microVUt(void) mVUclose(mV, bool isReset) {
microBlockManager::Delete(mVU->prog.prog[i].block[j]);
}
}
if (!isReset) { _aligned_free(mVU->prog.prog); }
if (!isReset) safe_aligned_free(mVU->prog.prog);
}
}
@ -135,14 +132,15 @@ microVUf(void) mVUclearProg(int progIndex) {
microVU* mVU = mVUx;
mVUprogI.used = 1;
mVUprogI.last_used = 3;
mVUprogI.x86ptr = mVUprogI.x86start;
//mVUprogI.x86ptr = mVUprogI.x86start;
for (int j = 0; j <= mVUprogI.ranges.max; j++) {
mVUprogI.ranges.range[j][0] = -1; // Set range to
mVUprogI.ranges.range[j][1] = -1; // indeterminable status
mVUprogI.ranges.total = -1;
}
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
if (mVUprogI.block[i]) mVUprogI.block[i]->reset();
//if (mVUprogI.block[i]) { mVUprogI.block[i]->reset(); }
microBlockManager::Delete(mVUprogI.block[i]);
}
}
@ -153,6 +151,7 @@ microVUf(void) mVUcacheProg(int progIndex) {
mVUdumpProg(progIndex);
}
#define aWrap(x, nMax) ((x > nMax) ? 0 : x)
// Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program)
microVUf(int) mVUfindLeastUsedProg() {
microVU* mVU = mVUx;
@ -165,22 +164,48 @@ microVUf(int) mVUfindLeastUsedProg() {
return mVU->prog.total;
}
else {
const int pMax = mVU->prog.max;
int smallidx = (mVU->prog.cur+1)&pMax;
u64 smallval = mVU->prog.prog[smallidx].used;
/*
const int pMax = mVU->prog.max;
int smallidx = aWrap((mVU->prog.cur+1), pMax);
u64 smallval = mVU->prog.prog[smallidx].used;
for (int i = 1, j = (smallidx+1)&pMax; i <= pMax; i++, j=(j+1)&pMax) {
for (int i = 1, j = aWrap((smallidx+1), pMax); i <= pMax; i++, aWrap((j+1), pMax)) {
if (smallval > mVU->prog.prog[j].used) {
smallval = mVU->prog.prog[j].used;
smallidx = j;
}
}
//smallidx = rand() % 200;
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
//Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval);
return smallidx;
*/
/*
static int smallidx = 0;
const int pMax = mVU->prog.max;
smallidx = aWrap((smallidx+1), pMax);
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
//Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval);
return smallidx;
*/
//mVUreset(mVU);
mVU->prog.x86ptr = mVU->prog.x86start;
for (int z = 0; z <= mVU->prog.max; z++) {
mVUclearProg<vuIndex>(z);
mVU->prog.prog[z].used = 0;
mVU->prog.prog[z].last_used = 0;
}
mVU->prog.total = 0;
mVUcacheProg<vuIndex>(mVU->prog.total); // Cache Micro Program
mVU->prog.prog[mVU->prog.total].used = 1;
mVU->prog.prog[mVU->prog.total].last_used = 3;
Console::Notice("microVU%d: Cached MicroPrograms = %d", params vuIndex, mVU->prog.total+1);
return mVU->prog.total;
}
}

View File

@ -27,57 +27,75 @@
#include "microVU_IR.h"
#include "microVU_Misc.h"
struct microBlockLink {
microBlock* block;
microBlockLink* next;
};
#define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...)
class microBlockManager {
private:
static const int MaxBlocks = mMaxBlocks - 1;
microBlock blockList[mMaxBlocks]; // Should always be first in the class to ensure 16-byte alignment
int listSize; // Total Items - 1
int listI; // Index to Add new block
microBlockLink blockList;
microBlockLink* blockEnd;
int listI;
public:
// Aligned replacement for 'new'
static microBlockManager* AlignedNew() {
microBlockManager* alloc = (microBlockManager*)_aligned_malloc(sizeof(microBlockManager), 16);
new (alloc) microBlockManager();
return alloc;
}
// Use instead of normal 'delete'
static void Delete(microBlockManager* dead) {
if (dead == NULL) return;
static void Delete(microBlockManager* &dead) {
if (!dead) return;
dead->~microBlockManager();
_aligned_free(dead);
safe_delete(dead);
}
microBlockManager() { reset(); }
~microBlockManager() {}
void reset() { listSize = -1; listI = -1; };
microBlockManager() {
listI = -1;
blockList.block = NULL;
blockList.next = NULL;
blockEnd = &blockList;
}
~microBlockManager() { reset(); }
void reset() {
if (listI >= 0) {
microBlockLink* linkI = &blockList;
microBlockLink* linkD = NULL;
for (int i = 0; i <= listI; i++) {
safe_aligned_free(linkI->block);
linkI = linkI->next;
safe_delete(linkD);
linkD = linkI;
}
safe_delete(linkI);
}
listI = -1;
blockEnd = &blockList;
};
microBlock* add(microBlock* pBlock) {
microBlock* thisBlock = search(&pBlock->pState);
if (!thisBlock) {
listI++;
if (listSize < MaxBlocks) { listSize++; }
if (listI > MaxBlocks) { Console::Error("microVU Warning: Block List Overflow"); listI = 0; }
memcpy_fast(&blockList[listI], pBlock, sizeof(microBlock));
thisBlock = &blockList[listI];
blockEnd->block = (microBlock*)_aligned_malloc(sizeof(microBlock), 16);
blockEnd->next = new microBlockLink;
memcpy_fast(blockEnd->block, pBlock, sizeof(microBlock));
thisBlock = blockEnd->block;
blockEnd = blockEnd->next;
}
return thisBlock;
}
__forceinline microBlock* search(microRegInfo* pState) {
if (listSize < 0) return NULL;
__releaseinline microBlock* search(microRegInfo* pState) {
microBlockLink* linkI = &blockList;
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i <= listSize; i++) {
if (mVUquickSearch((void*)pState, (void*)&blockList[i].pState, sizeof(microRegInfo))) return &blockList[i];
for (int i = 0; i <= listI; i++) {
if (mVUquickSearch((void*)pState, (void*)&linkI->block->pState, sizeof(microRegInfo))) return linkI->block;
linkI = linkI->next;
}
}
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
for (int i = 0; i <= listSize; i++) {
if ((blockList[i].pState.q == pState->q)
&& (blockList[i].pState.p == pState->p)
&& (blockList[i].pState.xgkick == pState->xgkick)
&& (blockList[i].pState.flags == pState->flags)
&& !(blockList[i].pState.needExactMatch & 0xf0f)) { return &blockList[i]; }
for (int i = 0; i <= listI; i++) {
if ((linkI->block->pState.q == pState->q)
&& (linkI->block->pState.p == pState->p)
&& (linkI->block->pState.vi15 == pState->vi15)
&& (linkI->block->pState.flags == pState->flags)
&& (linkI->block->pState.xgkick == pState->xgkick)
&& !(linkI->block->pState.needExactMatch & 0xf0f)) { return linkI->block; }
linkI = linkI->next;
}
}
return NULL;
@ -95,27 +113,27 @@ struct microRange {
struct microProgram {
u32 data [mProgSize]; // Holds a copy of the VU microProgram
microBlockManager* block[mProgSize/2]; // Array of Block Managers
microIR<mProgSize> allocInfo; // IR information
microRange ranges; // The ranges of the microProgram that have already been recompiled
u64 used; // Number of times its been used
u32 last_used; // Counters # of frames since last use (starts at 3 and counts backwards to 0 for each 30fps vSync)
u8* x86ptr; // Pointer to program's recompilation code
u8* x86start; // Start of program's rec-cache
u8* x86end; // Limit of program's rec-cache
};
#define mMaxProg ((mVU->index)?64:8) // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)
#define mMaxProg ((mVU->index)?400:8) // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)
struct microProgManager {
microProgram* prog; // Store MicroPrograms in memory
int max; // Max Number of MicroPrograms minus 1
int total; // Total Number of valid MicroPrograms minus 1
int cur; // Index to Current MicroProgram thats running (-1 = uncached)
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro (-1 = unknown, 0 = No, 1 = Yes)
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
microIR<mProgSize> allocInfo; // IR information
microProgram* prog; // Store MicroPrograms in memory
int max; // Max Number of MicroPrograms minus 1
int total; // Total Number of valid MicroPrograms minus 1
int cur; // Index to Current MicroProgram thats running (-1 = uncached)
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro (-1 = unknown, 0 = No, 1 = Yes)
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
u8* x86ptr; // Pointer to program's recompilation code
u8* x86start; // Start of program's rec-cache
u8* x86end; // Limit of program's rec-cache
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
};
#define mVUcacheSize (mMaxProg * 0xCCCCC) // 0.8mb per program
#define mVUcacheSize (mMaxProg * (0x100000 * 0.5)) // 0.5mb per program
struct microVU {
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)

View File

@ -102,6 +102,7 @@
// Writing to a VI reg
#define analyzeVIreg2(xReg, viWrite, aCycles) { \
if (xReg) { \
mVUconstReg[xReg].isValid = 0; \
mVUregsTemp.VIreg = xReg; \
mVUregsTemp.VI = aCycles; \
viWrite.reg = xReg; \
@ -114,6 +115,7 @@
#define analyzeRreg() { mVUregsTemp.r = 1; }
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } }
//------------------------------------------------------------------
// FMAC1 - Normal FMAC Opcodes
@ -173,6 +175,11 @@ microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
}
microVUt(void) mVUanalyzeIADDI(mV, int Is, int It, s16 imm) {
mVUanalyzeIALU2(mVU, Is, It);
if (!Is) { setConstReg(It, imm); }
}
//------------------------------------------------------------------
// MR32 - MR32 Opcode
//------------------------------------------------------------------
@ -386,3 +393,16 @@ microVUt(void) mVUanalyzeBranch2(mV, int Is, int It) {
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
}
}
microVUt(void) mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
if (mVUconstReg[Is].isValid && !CHECK_VU_CONSTHACK) {
mVUlow.constJump.isValid = 1;
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
//DevCon::Status("microVU%d: Constant JR/JALR Address Optimization", params mVU->index);
}
analyzeVIreg1(Is, mVUlow.VI_read[0]);
if (isJALR) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr);
}
}

View File

@ -62,6 +62,7 @@
}
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
#define tCycles(dest, src) { dest = aMax(dest, src); }
#define incP() { mVU->p = (mVU->p+1) & 1; }
#define incQ() { mVU->q = (mVU->q+1) & 1; }
@ -69,7 +70,7 @@
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
#define doSwapOp() { doBackupVF1(); mVUopL(mVU, 1); doBackupVF2(); incPC(1); doUpperOp(); doBackupVF3(); }
#define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } }
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = microBlockManager::AlignedNew(); }
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager();/*microBlockManager::AlignedNew();*/ }
//------------------------------------------------------------------
// Helper Functions
@ -144,6 +145,20 @@ microVUt(void) mVUsetupRange(mV, s32 pc, bool isStartPC) {
}
}
// Optimizes the End Pipeline State Removing Unnecessary Info
microVUt(void) mVUoptimizePipeState(mV) {
for (int i = 0; i < 32; i++) {
optimizeReg(mVUregs.VF[i].x);
optimizeReg(mVUregs.VF[i].y);
optimizeReg(mVUregs.VF[i].z);
optimizeReg(mVUregs.VF[i].w);
}
for (int i = 0; i < 16; i++) {
optimizeReg(mVUregs.VI[i]);
}
mVUregs.r = 0;
}
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
microVUt(void) mVUsetupBranch(mV, int* xStatus, int* xMac, int* xClip, int xCycles) {
mVUprint("mVUsetupBranch");
@ -324,6 +339,15 @@ microVUt(void) mVUtestCycles(mV) {
x86SetJ32(jmp32);
}
microVUt(void) mVUinitConstValues(mV) {
for (int i = 0; i < 16; i++) {
mVUconstReg[i].isValid = 0;
mVUconstReg[i].regValue = 0;
}
mVUconstReg[15].isValid = mVUregs.vi15 >> 31;
mVUconstReg[15].regValue = mVUconstReg[15].isValid ? (mVUregs.vi15&0xffff) : 0;
}
//------------------------------------------------------------------
// Recompiler
//------------------------------------------------------------------
@ -349,12 +373,14 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
mVU->q = 0; // All blocks start at q index #0
memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info
mVUblock.x86ptrStart = thisPtr;
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
mVUpBlock = pBlock;
mVUregs.flags = 0;
mVUflagInfo = 0;
mVUsFlagHack = CHECK_VU_FLAGHACK;
mVUinitConstValues(mVU);
for (int branch = 0; mVUcount < endCount; mVUcount++) {
incPC(1);
startLoop();
@ -381,6 +407,12 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip);
mVUtestCycles(mVU);
// Fix up vi15 const info for propagation through blocks
mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
// Optimize the End Pipeline State for nicer Block Linking
mVUoptimizePipeState(mVU);
// Second Pass
iPC = mVUstartPC;
setCode();
@ -419,14 +451,31 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
// Check if branch-block has already been compiled
blockCreate(branchAddr/8);
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); }
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, branchAddr, (uptr)&mVUregs); }
return thisPtr;
case 9: case 10: // JR/JALR
mVUprint("mVUcompile JR/JALR");
incPC(-3); // Go back to jump opcode
if (mVUlow.constJump.isValid) {
if (mVUup.eBit) { // E-bit Jump
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);
mVUendProgram(mVU, 1, xStatus, xMac, xClip);
}
else {
int jumpAddr = (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8);
mVUsetupBranch(mVU, xStatus, xMac, xClip, xCycles);
// Check if jump-to-block has already been compiled
blockCreate(jumpAddr/8);
pBlock = mVUblocks[jumpAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { xJMP(pBlock->x86ptrStart); }
else { mVUcompile(mVU, jumpAddr, (uptr)&mVUregs); }
}
return thisPtr;
}
if (mVUup.eBit) { // E-bit Jump
mVUendProgram(mVU, 2, xStatus, xMac, xClip);
MOV32MtoR(gprT1, (uptr)&mVU->branch);

View File

@ -127,7 +127,7 @@ microVUx(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
mVU->cycles = cycles;
mVU->totalCycles = cycles;
x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off
x86SetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState);
}
@ -140,8 +140,8 @@ microVUx(void) mVUcleanUp() {
//mVUprint("microVU: Program exited successfully!");
//mVUprint("microVU: VF0 = {%x,%x,%x,%x}", params mVU->regs->VF[0].UL[0], mVU->regs->VF[0].UL[1], mVU->regs->VF[0].UL[2], mVU->regs->VF[0].UL[3]);
//mVUprint("microVU: VI0 = %x", params mVU->regs->VI[0].UL);
mVUcurProg.x86ptr = x86Ptr;
mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
mVU->prog.x86ptr = x86Ptr;
mVUcacheCheck(x86Ptr, mVU->prog.x86start, (uptr)(mVU->prog.x86end - mVU->prog.x86start));
mVU->cycles = mVU->totalCycles - mVU->cycles;
mVU->regs->cycle += mVU->cycles;
cpuRegs.cycle += ((mVU->cycles < 3000) ? mVU->cycles : 3000) * Config.Hacks.VUCycleSteal;

View File

@ -265,7 +265,10 @@ void mVUflagPass(mV, u32 startPC, u32 xCount) {
// Checks if the first 4 instructions of a block will read flags
microVUt(void) mVUsetFlagInfo(mV) {
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr, 4); incPC(1); }
branchType2 { mVUflagInfo |= 0xfff; }
branchType2 {
if (!mVUlow.constJump.isValid) { mVUflagInfo |= 0xfff; }
else { mVUflagPass(mVU, mVUlow.constJump.regValue, 4); }
}
branchType3 {
incPC(-1);
mVUflagPass(mVU, branchAddr, 4);

View File

@ -35,6 +35,7 @@ union regInfo {
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u32 needExactMatch; // If set, block needs an exact match of pipeline state
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
u8 q;
u8 p;
u8 r;
@ -42,7 +43,7 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u8 VI[16];
regInfo VF[32];
u8 flags; // clip x2 :: status x2
u8 padding[7]; // 160 bytes
u8 padding[3]; // 160 bytes
#if defined(_MSC_VER)
};
#else
@ -84,6 +85,11 @@ struct microVIreg {
u8 used; // Reg is Used? (Read/Written)
};
struct microConstInfo {
u8 isValid; // Is the constant in regValue valid?
u32 regValue; // Constant Value
};
struct microUpperOp {
bool eBit; // Has E-bit set
bool iBit; // Has I-bit set
@ -93,10 +99,11 @@ struct microUpperOp {
};
struct microLowerOp {
microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction
microVIreg VI_write; // VI reg written to by this instruction
microVIreg VI_read[2]; // VI regs read by this instruction
microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction
microVIreg VI_write; // VI reg written to by this instruction
microVIreg VI_read[2]; // VI regs read by this instruction
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool isNOP; // This instruction is a NOP
bool isFSSET; // This instruction is a FSSET
@ -137,10 +144,11 @@ struct microOp {
template<u32 pSize>
struct microIR {
microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
microOp info[pSize/2]; // Info for Instructions in current block
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
u8 branch;
u32 cycles; // Cycles for current block
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)

View File

@ -612,7 +612,7 @@ mVUop(mVU_IADD) {
}
mVUop(mVU_IADDI) {
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
pass2 {
mVUallocVIa(mVU, gprT1, _Is_);
ADD16ItoR(gprT1, _Imm5_);
@ -622,7 +622,7 @@ mVUop(mVU_IADDI) {
}
mVUop(mVU_IADDIU) {
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
pass2 {
mVUallocVIa(mVU, gprT1, _Is_);
ADD16ItoR(gprT1, _Imm15_);
@ -1127,7 +1127,7 @@ mVUop(mVU_B) {
mVUop(mVU_BAL) {
setBranchA(2, _It_);
pass1 { analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
pass1 { analyzeVIreg2(_It_, mVUlow.VI_write, 1); setConstReg(_It_, bSaveAddr); }
pass2 {
MOV32ItoR(gprT1, bSaveAddr);
mVUallocVIb(mVU, gprT1, _It_);
@ -1207,24 +1207,28 @@ mVUop(mVU_IBNE) {
mVUop(mVU_JR) {
mVUbranch = 9;
pass1 { analyzeVIreg1(_Is_, mVUlow.VI_read[0]); }
pass1 { mVUanalyzeJump(mVU, _Is_, 0, 0); }
pass2 {
mVUallocVIa(mVU, gprT1, _Is_);
SHL32ItoR(gprT1, 3);
AND32ItoR(gprT1, isVU1 ? 0x3ff8 : 0xff8);
MOV32RtoM((uptr)&mVU->branch, gprT1);
if (!mVUlow.constJump.isValid) {
mVUallocVIa(mVU, gprT1, _Is_);
SHL32ItoR(gprT1, 3);
AND32ItoR(gprT1, mVU->microMemSize - 8);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
}
pass3 { mVUlog("JR [vi%02d]", _Fs_); }
}
mVUop(mVU_JALR) {
mVUbranch = 10;
pass1 { analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); }
pass2 {
mVUallocVIa(mVU, gprT1, _Is_);
SHL32ItoR(gprT1, 3);
AND32ItoR(gprT1, isVU1 ? 0x3ff8 : 0xff8);
MOV32RtoM((uptr)&mVU->branch, gprT1);
if (!mVUlow.constJump.isValid) {
mVUallocVIa(mVU, gprT1, _Is_);
SHL32ItoR(gprT1, 3);
AND32ItoR(gprT1, mVU->microMemSize - 8);
MOV32RtoM((uptr)&mVU->branch, gprT1);
}
MOV32ItoR(gprT1, bSaveAddr);
mVUallocVIb(mVU, gprT1, _It_);
}

View File

@ -187,7 +187,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
#define mVUprogI mVU->prog.prog[progIndex]
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
#define mVUblocks mVU->prog.prog[mVU->prog.cur].block
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
#define mVUallocInfo mVU->prog.allocInfo
#define mVUbranch mVUallocInfo.branch
#define mVUcycles mVUallocInfo.cycles
#define mVUcount mVUallocInfo.count
@ -198,6 +198,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
#define iPC mVUallocInfo.curPC
#define mVUsFlagHack mVUallocInfo.sFlagHack
#define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUconstReg mVUallocInfo.constReg
#define mVUstall mVUinfo.stall
#define mVUup mVUinfo.uOp
#define mVUlow mVUinfo.lOp
@ -212,7 +213,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
#define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
#define bSaveAddr (((xPC + (2 * 8)) & ((isVU1) ? 0x3ff8:0xff8)) / 8)
#define bSaveAddr (((xPC + 16) & (mVU->microMemSize-8)) / 8)
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & (mVU->microMemSize-8))
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
#define cmpOffset(x) ((u8*)&(((u8*)x)[mVUprogI.ranges.range[i][0]]))
@ -272,6 +273,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
#endif
// Speed Hacks
#define CHECK_VU_CONSTHACK 0 // Only use for GoW (will be slower on other games)
#define CHECK_VU_FLAGHACK (u32)Config.Hacks.vuFlagHack // (Can cause Infinite loops, SPS, etc...)
#define CHECK_VU_MINMAXHACK (u32)Config.Hacks.vuMinMax // (Can cause SPS, Black Screens, etc...)
@ -282,12 +284,12 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
// Cache Limit Check
#define mVUcacheCheck(ptr, start, limit) { \
uptr diff = ptr - start; \
if (diff >= limit) { \
Console::Error("microVU Error: Program went over its cache limit. Size = 0x%x", params diff); \
mVUreset(mVU); \
} \
#define mVUcacheCheck(ptr, start, limit) { \
uptr diff = ptr - start; \
if (diff >= limit) { \
Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", params mVU->index, diff); \
mVUreset(mVU); \
} \
}
#define mVUdebugNOW(isEndPC) { \