microVU: coded a speedhack that optimizes out all status flag updates in a microprogram if the entire program doesn't read the status flags.

FFX's intro got a 8~10% speedup with the hack, and it should be safe for 90~95% of games.
It can be enabled in microVU_Misc.h


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1167 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-11 06:15:16 +00:00
parent d19229d334
commit ef01d581b0
9 changed files with 37 additions and 8 deletions

View File

@ -128,6 +128,7 @@ microVUt(void) mVUclear(u32 addr, u32 size) {
microVUt(void) mVUclearProg(int progIndex) { microVUt(void) mVUclearProg(int progIndex) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
mVU->prog.prog[progIndex].used = 1; mVU->prog.prog[progIndex].used = 1;
mVU->prog.prog[progIndex].sFlagHack = 0;
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
for (u32 i = 0; i < (mVU->progSize / 2); i++) { for (u32 i = 0; i < (mVU->progSize / 2); i++) {
mVU->prog.prog[progIndex].block[i]->reset(); mVU->prog.prog[progIndex].block[i]->reset();
@ -139,6 +140,7 @@ microVUt(void) mVUcacheProg(int progIndex) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
memcpy_fast(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize); memcpy_fast(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize);
mVUdumpProg(progIndex); mVUdumpProg(progIndex);
mVUcheckSflag<vuIndex>(progIndex);
} }
// Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program) // Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program)

View File

@ -68,6 +68,7 @@ template<u32 progSize>
struct microProgram { struct microProgram {
u32 data[progSize/4]; u32 data[progSize/4];
u32 used; // Number of times its been used u32 used; // Number of times its been used
u32 sFlagHack; // Optimize out Status Flag Updates if Program doesn't use Status Flags
u8* x86ptr; // Pointer to program's recompilation code u8* x86ptr; // Pointer to program's recompilation code
u8* x86start; // Start of program's rec-cache u8* x86start; // Start of program's rec-cache
u8* x86end; // Limit of program's rec-cache u8* x86end; // Limit of program's rec-cache

View File

@ -75,7 +75,7 @@ struct microAllocInfo {
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block u32 startPC; // Start PC for Cur Block
u32 flagInfo; // Holds information to help with flag instances on block linking u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
u32 info[pSize/8]; // Info for Instructions in current block u32 info[pSize/8]; // Info for Instructions in current block
u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
}; };

View File

@ -118,10 +118,12 @@ microVUt(void) mVUendProgram(int fStatus, int fMac, int fClip) {
} }
// Save Flag Instances // Save Flag Instances
if (!mVUflagHack) {
getFlagReg(fStatus, fStatus); getFlagReg(fStatus, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
}
mVUallocMFLAGa<vuIndex>(gprT1, fMac); mVUallocMFLAGa<vuIndex>(gprT1, fMac);
mVUallocCFLAGa<vuIndex>(gprT2, fClip); mVUallocCFLAGa<vuIndex>(gprT2, fClip);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1); MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2); MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);

View File

@ -151,7 +151,7 @@ microVUt(int) mVUsetFlags(int* xStatus, int* xMac, int* xClip) {
microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) { microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (__Status) { if (__Status && !mVUflagHack) {
int bStatus[4]; int bStatus[4];
sortFlag(xStatus, bStatus, cycles); sortFlag(xStatus, bStatus, cycles);
PUSH32R(gprR); // Backup gprR PUSH32R(gprR); // Backup gprR

View File

@ -551,7 +551,7 @@ microVUf(void) mVU_FSAND() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); } pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
} }
microVUf(void) mVU_FSEQ() { microVUf(void) mVU_FSEQ() {
@ -565,7 +565,7 @@ microVUf(void) mVU_FSEQ() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); } pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
} }
microVUf(void) mVU_FSOR() { microVUf(void) mVU_FSOR() {
@ -577,7 +577,7 @@ microVUf(void) mVU_FSOR() {
mVUallocVIb<vuIndex>(gprT1, _It_); mVUallocVIb<vuIndex>(gprT1, _It_);
} }
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); SysPrintf("b\n"); } pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
} }
microVUf(void) mVU_FSSET() { microVUf(void) mVU_FSSET() {
@ -591,6 +591,7 @@ microVUf(void) mVU_FSSET() {
OR16ItoR (flagReg1, (_Imm12_ & 0xfc0)); OR16ItoR (flagReg1, (_Imm12_ & 0xfc0));
} }
pass3 { mVUlog("FSSET $%x", _Imm12_); } pass3 { mVUlog("FSSET $%x", _Imm12_); }
pass4 { mVUsFlagHack = 0; }
} }
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -160,7 +160,8 @@ declareAllVariables
#define mVUregs mVUallocInfo.block.pState #define mVUregs mVUallocInfo.block.pState
#define mVUregsTemp mVUallocInfo.regsTemp #define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC #define iPC mVUallocInfo.curPC
#define mVUflagInfo mVUregs.needExactMatch //mVUallocInfo.flagInfo #define mVUflagInfo mVUregs.needExactMatch
#define mVUsFlagHack mVUallocInfo.sFlagHack
#define mVUinfo mVUallocInfo.info[iPC / 2] #define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUstall mVUallocInfo.stall[iPC / 2] #define mVUstall mVUallocInfo.stall[iPC / 2]
#define mVUstartPC mVUallocInfo.startPC #define mVUstartPC mVUallocInfo.startPC
@ -286,6 +287,10 @@ declareAllVariables
#define mVUdumpProg 0&& #define mVUdumpProg 0&&
#endif #endif
// Status Flag Speed Hack
#define CHECK_VU_FLAGHACK 0 // Set to 1 to turn hack on
#define mVUflagHack (mVUcurProg.sFlagHack)
// Cache Limit Check // Cache Limit Check
#define mVUcacheCheck(ptr, start, limit) { \ #define mVUcacheCheck(ptr, start, limit) { \
uptr diff = ptr - start; \ uptr diff = ptr - start; \

View File

@ -299,4 +299,21 @@ microVUt(void) mVUrestoreRegs() {
POP32R(gprR); // Restore EDX POP32R(gprR); // Restore EDX
} }
microVUt(void) mVUcheckSflag(int progIndex) {
if (CHECK_VU_FLAGHACK) {
microVU* mVU = mVUx;
mVUsFlagHack = 1;
for (u32 i = 0; i < mVU->progSize; i+=2) {
mVU->code = mVU->prog.prog[progIndex].data[i+1];
mVUopU<vuIndex, 3>();
mVU->code = mVU->prog.prog[progIndex].data[i];
mVUopL<vuIndex, 3>();
}
mVUflagInfo = 0;
mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack;
}
}
#endif //PCSX2_MICROVU #endif //PCSX2_MICROVU

View File

@ -35,6 +35,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
//SysPrintf("doStatus = %d; doMac = %d\n", doStatus>>9, doMac>>8); //SysPrintf("doStatus = %d; doMac = %d\n", doStatus>>9, doMac>>8);
if (mVUflagHack) { mVUinfo &= ~_doStatus; }
if (!doFlags) return; if (!doFlags) return;
if (!doMac) { regT1 = reg; } if (!doMac) { regT1 = reg; }
else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw