mirror of https://github.com/PCSX2/pcsx2.git
microVU: coded a speedhack that optimizes out all status flag updates in a microprogram if the entire program doesn't read the status flags.
FFX's intro got a 8~10% speedup with the hack, and it should be safe for 90~95% of games. It can be enabled in microVU_Misc.h git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1167 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d19229d334
commit
ef01d581b0
|
@ -128,6 +128,7 @@ microVUt(void) mVUclear(u32 addr, u32 size) {
|
||||||
microVUt(void) mVUclearProg(int progIndex) {
|
microVUt(void) mVUclearProg(int progIndex) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
mVU->prog.prog[progIndex].used = 1;
|
mVU->prog.prog[progIndex].used = 1;
|
||||||
|
mVU->prog.prog[progIndex].sFlagHack = 0;
|
||||||
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
|
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
|
||||||
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
|
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
|
||||||
mVU->prog.prog[progIndex].block[i]->reset();
|
mVU->prog.prog[progIndex].block[i]->reset();
|
||||||
|
@ -139,6 +140,7 @@ microVUt(void) mVUcacheProg(int progIndex) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
memcpy_fast(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize);
|
memcpy_fast(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize);
|
||||||
mVUdumpProg(progIndex);
|
mVUdumpProg(progIndex);
|
||||||
|
mVUcheckSflag<vuIndex>(progIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program)
|
// Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program)
|
||||||
|
|
|
@ -68,6 +68,7 @@ template<u32 progSize>
|
||||||
struct microProgram {
|
struct microProgram {
|
||||||
u32 data[progSize/4];
|
u32 data[progSize/4];
|
||||||
u32 used; // Number of times its been used
|
u32 used; // Number of times its been used
|
||||||
|
u32 sFlagHack; // Optimize out Status Flag Updates if Program doesn't use Status Flags
|
||||||
u8* x86ptr; // Pointer to program's recompilation code
|
u8* x86ptr; // Pointer to program's recompilation code
|
||||||
u8* x86start; // Start of program's rec-cache
|
u8* x86start; // Start of program's rec-cache
|
||||||
u8* x86end; // Limit of program's rec-cache
|
u8* x86end; // Limit of program's rec-cache
|
||||||
|
|
|
@ -75,7 +75,7 @@ struct microAllocInfo {
|
||||||
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
||||||
u32 curPC; // Current PC
|
u32 curPC; // Current PC
|
||||||
u32 startPC; // Start PC for Cur Block
|
u32 startPC; // Start PC for Cur Block
|
||||||
u32 flagInfo; // Holds information to help with flag instances on block linking
|
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
||||||
u32 info[pSize/8]; // Info for Instructions in current block
|
u32 info[pSize/8]; // Info for Instructions in current block
|
||||||
u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
|
u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
|
||||||
};
|
};
|
||||||
|
|
|
@ -118,10 +118,12 @@ microVUt(void) mVUendProgram(int fStatus, int fMac, int fClip) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save Flag Instances
|
// Save Flag Instances
|
||||||
getFlagReg(fStatus, fStatus);
|
if (!mVUflagHack) {
|
||||||
|
getFlagReg(fStatus, fStatus);
|
||||||
|
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
|
||||||
|
}
|
||||||
mVUallocMFLAGa<vuIndex>(gprT1, fMac);
|
mVUallocMFLAGa<vuIndex>(gprT1, fMac);
|
||||||
mVUallocCFLAGa<vuIndex>(gprT2, fClip);
|
mVUallocCFLAGa<vuIndex>(gprT2, fClip);
|
||||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
|
|
||||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
|
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprT1);
|
||||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);
|
MOV32RtoM((uptr)&mVU->regs->VI[REG_CLIP_FLAG].UL, gprT2);
|
||||||
|
|
||||||
|
|
|
@ -151,7 +151,7 @@ microVUt(int) mVUsetFlags(int* xStatus, int* xMac, int* xClip) {
|
||||||
microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) {
|
microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
|
|
||||||
if (__Status) {
|
if (__Status && !mVUflagHack) {
|
||||||
int bStatus[4];
|
int bStatus[4];
|
||||||
sortFlag(xStatus, bStatus, cycles);
|
sortFlag(xStatus, bStatus, cycles);
|
||||||
PUSH32R(gprR); // Backup gprR
|
PUSH32R(gprR); // Backup gprR
|
||||||
|
|
|
@ -551,7 +551,7 @@ microVUf(void) mVU_FSAND() {
|
||||||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
||||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); }
|
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUf(void) mVU_FSEQ() {
|
microVUf(void) mVU_FSEQ() {
|
||||||
|
@ -565,7 +565,7 @@ microVUf(void) mVU_FSEQ() {
|
||||||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
||||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); }
|
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUf(void) mVU_FSOR() {
|
microVUf(void) mVU_FSOR() {
|
||||||
|
@ -577,7 +577,7 @@ microVUf(void) mVU_FSOR() {
|
||||||
mVUallocVIb<vuIndex>(gprT1, _It_);
|
mVUallocVIb<vuIndex>(gprT1, _It_);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
||||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); SysPrintf("b\n"); }
|
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUf(void) mVU_FSSET() {
|
microVUf(void) mVU_FSSET() {
|
||||||
|
@ -591,6 +591,7 @@ microVUf(void) mVU_FSSET() {
|
||||||
OR16ItoR (flagReg1, (_Imm12_ & 0xfc0));
|
OR16ItoR (flagReg1, (_Imm12_ & 0xfc0));
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSSET $%x", _Imm12_); }
|
pass3 { mVUlog("FSSET $%x", _Imm12_); }
|
||||||
|
pass4 { mVUsFlagHack = 0; }
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
|
@ -160,7 +160,8 @@ declareAllVariables
|
||||||
#define mVUregs mVUallocInfo.block.pState
|
#define mVUregs mVUallocInfo.block.pState
|
||||||
#define mVUregsTemp mVUallocInfo.regsTemp
|
#define mVUregsTemp mVUallocInfo.regsTemp
|
||||||
#define iPC mVUallocInfo.curPC
|
#define iPC mVUallocInfo.curPC
|
||||||
#define mVUflagInfo mVUregs.needExactMatch //mVUallocInfo.flagInfo
|
#define mVUflagInfo mVUregs.needExactMatch
|
||||||
|
#define mVUsFlagHack mVUallocInfo.sFlagHack
|
||||||
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
||||||
#define mVUstall mVUallocInfo.stall[iPC / 2]
|
#define mVUstall mVUallocInfo.stall[iPC / 2]
|
||||||
#define mVUstartPC mVUallocInfo.startPC
|
#define mVUstartPC mVUallocInfo.startPC
|
||||||
|
@ -286,6 +287,10 @@ declareAllVariables
|
||||||
#define mVUdumpProg 0&&
|
#define mVUdumpProg 0&&
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Status Flag Speed Hack
|
||||||
|
#define CHECK_VU_FLAGHACK 0 // Set to 1 to turn hack on
|
||||||
|
#define mVUflagHack (mVUcurProg.sFlagHack)
|
||||||
|
|
||||||
// Cache Limit Check
|
// Cache Limit Check
|
||||||
#define mVUcacheCheck(ptr, start, limit) { \
|
#define mVUcacheCheck(ptr, start, limit) { \
|
||||||
uptr diff = ptr - start; \
|
uptr diff = ptr - start; \
|
||||||
|
|
|
@ -299,4 +299,21 @@ microVUt(void) mVUrestoreRegs() {
|
||||||
POP32R(gprR); // Restore EDX
|
POP32R(gprR); // Restore EDX
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
microVUt(void) mVUcheckSflag(int progIndex) {
|
||||||
|
if (CHECK_VU_FLAGHACK) {
|
||||||
|
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
mVUsFlagHack = 1;
|
||||||
|
for (u32 i = 0; i < mVU->progSize; i+=2) {
|
||||||
|
mVU->code = mVU->prog.prog[progIndex].data[i+1];
|
||||||
|
mVUopU<vuIndex, 3>();
|
||||||
|
mVU->code = mVU->prog.prog[progIndex].data[i];
|
||||||
|
mVUopL<vuIndex, 3>();
|
||||||
|
}
|
||||||
|
mVUflagInfo = 0;
|
||||||
|
mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif //PCSX2_MICROVU
|
#endif //PCSX2_MICROVU
|
||||||
|
|
|
@ -35,6 +35,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
||||||
|
|
||||||
//SysPrintf("doStatus = %d; doMac = %d\n", doStatus>>9, doMac>>8);
|
//SysPrintf("doStatus = %d; doMac = %d\n", doStatus>>9, doMac>>8);
|
||||||
|
if (mVUflagHack) { mVUinfo &= ~_doStatus; }
|
||||||
if (!doFlags) return;
|
if (!doFlags) return;
|
||||||
if (!doMac) { regT1 = reg; }
|
if (!doMac) { regT1 = reg; }
|
||||||
else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw
|
else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw
|
||||||
|
|
Loading…
Reference in New Issue