- Second try at a status flag optimization. Still needs tweaking but too tired xD

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1309 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-06-03 09:22:25 +00:00
parent d8fb66f143
commit 96f7911d26
8 changed files with 116 additions and 47 deletions

View File

@ -647,17 +647,44 @@ microVUt(void) mVUallocFMAC26b(mV, int& ACCw, int& ACCr) {
} \
}
#define setBitSFLAG(bitTest, bitSet) { \
TEST32ItoR(regT, bitTest); \
pjmp = JZ8(0); \
OR32ItoR(reg, bitSet); \
x86SetJ8(pjmp); \
}
#define setBitFSEQ(bitX) { \
TEST32ItoR(gprT1, bitX); \
pjmp = JZ8(0); \
OR32ItoR(gprT1, bitX); \
x86SetJ8(pjmp); \
}
microVUt(void) mVUallocSFLAGa(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
MOVZX32R16toR(reg, fInstance);
MOV32RtoR(reg, fInstance);
}
microVUt(void) mVUallocSFLAGb(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
//AND32ItoR(reg, 0xffff);
MOV32RtoR(fInstance, reg);
}
// Normalize Status Flag
microVUt(void) mVUallocSFLAGc(int reg, int regT, int fInstance) {
u8 *pjmp;
XOR32RtoR(reg, reg);
mVUallocSFLAGa(regT, fInstance);
setBitSFLAG(0x000f, 0x0001); // Z Bit
setBitSFLAG(0x00f0, 0x0002); // S Bit
setBitSFLAG(0x0f00, 0x0040); // ZS Bit
setBitSFLAG(0xf000, 0x0080); // SS Bit
AND32ItoR(regT, 0xffff0000); // DS/DI/OS/US/D/I/O/U Bits
SHR32ItoR(regT, 14);
OR32RtoR(reg, regT);
}
microVUt(void) mVUallocMFLAGa(mV, int reg, int fInstance) {
MOVZX32M16toR(reg, (uptr)&mVU->macFlag[fInstance]);
}

View File

@ -169,8 +169,8 @@ microVUt(void) mVUendProgram(mV, int qInst, int pInst, int fStatus, int fMac, in
// Save Flag Instances
if (!mVUflagHack) {
getFlagReg(fStatus, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
mVUallocSFLAGc(gprT1, gprT2, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1);
}
mVUallocMFLAGa(mVU, gprT1, fMac);
mVUallocCFLAGa(mVU, gprT2, fClip);
@ -196,13 +196,13 @@ microVUt(void) mVUtestCycles(mV) {
iPC = mVUstartPC;
mVUdebugNOW(0);
SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
u8* jmp8 = JG8(0);
u32* jmp32 = JG32(0);
MOV32ItoR(gprT2, xPC);
if (!isVU1) CALLFunc((uptr)mVUwarning0);
else CALLFunc((uptr)mVUwarning1);
MOV32ItoR(gprR, Roffset); // Restore gprR
mVUendProgram(mVU, 0, 0, sI, 0, cI);
x86SetJ8(jmp8);
x86SetJ32(jmp32);
}
//------------------------------------------------------------------
@ -378,7 +378,7 @@ eBitTemination:
if (mVUinfo.doDivFlag) {
int flagReg;
getFlagReg(flagReg, lStatus);
AND32ItoR (flagReg, 0x0fcf);
AND32ItoR (flagReg, 0xfff3ffff);
OR32MtoR (flagReg, (uptr)&mVU->divFlag);
}
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }

View File

@ -42,10 +42,24 @@ microVUt(void) mVUdispatcherA(mV) {
// Load Regs
MOV32ItoR(gprR, Roffset); // Load VI Reg Offset
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
AND32ItoR(gprF0, 0xffff);
MOV32RtoR(gprF1, gprF0);
SHL32ItoR(gprF1, 3);
AND32ItoR(gprF1, 0x218);
MOV32RtoR(gprF2, gprF0);
SHL32ItoR(gprF2, 5);
AND32ItoR(gprF2, 0x1000);
OR32RtoR (gprF1, gprF2);
MOV32RtoR(gprF3, gprF0);
SHL32ItoR(gprF3, 14);
AND32ItoR(gprF3, 0x3cf0000);
OR32RtoR (gprF1, gprF3);
MOV32RtoR(gprF0, gprF1);
MOV32RtoR(gprF2, gprF1);
MOV32RtoR(gprF3, gprF1);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);

View File

@ -24,7 +24,7 @@ microVUt(void) mVUdivSet(mV) {
if (mVUinfo.doDivFlag) {
getFlagReg(flagReg1, sFLAG.write);
if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); }
AND32ItoR(flagReg1, 0x0fcf);
AND32ItoR(flagReg1, 0xfff3ffff);
OR32MtoR (flagReg1, (uptr)&mVU->divFlag);
}
}

View File

@ -37,7 +37,7 @@
SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \
TEST32ItoR(gprTemp, 1); /* Check sign bit */ \
aJump = JZ8(0); /* Skip if positive */ \
MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \
MOV32ItoM((uptr)&mVU->divFlag, divI); /* Set Invalid Flags */ \
SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \
x86SetJ8(aJump); \
}
@ -54,10 +54,10 @@ mVUop(mVU_DIV) {
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
ajmp = JZ8(0);
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
MOV32ItoM((uptr)&mVU->divFlag, divI); // Set invalid flag (0/0)
bjmp = JMP8(0);
x86SetJ8(ajmp);
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0)
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0)
x86SetJ8(bjmp);
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
@ -112,10 +112,10 @@ mVUop(mVU_RSQRT) {
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
bjmp = JZ8(0); // Skip if none are
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
MOV32ItoM((uptr)&mVU->divFlag, divI); // Set invalid flag (0/0)
cjmp = JMP8(0);
x86SetJ8(bjmp);
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0)
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide flag (only when not 0/0)
x86SetJ8(cjmp);
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
@ -522,7 +522,7 @@ mVUop(mVU_FMOR) {
mVUop(mVU_FSAND) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
mVUallocSFLAGa(gprT1, sFLAG.read);
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
AND16ItoR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_);
}
@ -530,23 +530,10 @@ mVUop(mVU_FSAND) {
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
}
mVUop(mVU_FSEQ) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
mVUallocSFLAGa(gprT1, sFLAG.read);
XOR16ItoR(gprT1, _Imm12_);
SUB16ItoR(gprT1, 1);
SHR16ItoR(gprT1, 15);
mVUallocVIb(mVU, gprT1, _It_);
}
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
}
mVUop(mVU_FSOR) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
mVUallocSFLAGa(gprT1, sFLAG.read);
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
OR16ItoR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_);
}
@ -554,14 +541,54 @@ mVUop(mVU_FSOR) {
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
}
mVUop(mVU_FSEQ) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
u8 *pjmp;
int imm = 0;
if (_Imm12_ & 0x0001) imm |= 0x000000f; // Z
if (_Imm12_ & 0x0002) imm |= 0x00000f0; // S
if (_Imm12_ & 0x0004) imm |= 0x0010000; // U
if (_Imm12_ & 0x0008) imm |= 0x0020000; // O
if (_Imm12_ & 0x0010) imm |= 0x0040000; // I
if (_Imm12_ & 0x0020) imm |= 0x0080000; // D
if (_Imm12_ & 0x0040) imm |= 0x0000f00; // ZS
if (_Imm12_ & 0x0080) imm |= 0x000f000; // SS
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
mVUallocSFLAGa(gprT1, sFLAG.read);
setBitFSEQ(0x000f); // Z bit
setBitFSEQ(0x00f0); // S bit
setBitFSEQ(0x0f00); // ZS bit
setBitFSEQ(0xf000); // SS bit
XOR32ItoR(gprT1, imm);
SUB32ItoR(gprT1, 1);
SHR32ItoR(gprT1, 31);
mVUallocVIb(mVU, gprT1, _It_);
}
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
}
mVUop(mVU_FSSET) {
pass1 { mVUanalyzeFSSET(mVU); }
pass2 {
int flagReg1, flagReg2;
getFlagReg(flagReg1, sFLAG.write);
if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction
AND32ItoR(flagReg1, 0x03f);
OR32ItoR (flagReg1, (_Imm12_ & 0xfc0));
int sReg, imm = 0;
if (_Imm12_ & 0x0040) imm |= 0x0000f00; // ZS
if (_Imm12_ & 0x0080) imm |= 0x000f000; // SS
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
getFlagReg(sReg, sFLAG.write);
if (!(sFLAG.doFlag || mVUinfo.doDivFlag)) {
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
}
AND32ItoR(sReg, 0xf00ff); // Keep Non-Sticky Bits
if (imm) OR32ItoR(sReg, imm);
}
pass3 { mVUlog("FSSET $%x", _Imm12_); }
pass4 { mVUsFlagHack = 0; }

View File

@ -105,6 +105,9 @@ declareAllVariables
#define _Tbit_ (1<<27)
#define _MDTbit_ 0 //( _Mbit_ | _Dbit_ | _Tbit_ ) // ToDo: Implement this stuff...
#define divI 0x1040000
#define divD 0x2080000
#define isVU1 (mVU == &microVU1)
#define getIndex (isVU1 ? 1 : 0)
#define getVUmem(x) (((isVU1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16)

View File

@ -300,6 +300,7 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) {
if (CHECK_VU_FLAGHACK1) {
int bFlagInfo = mVUflagInfo;
int bCode = mVU->code;
int bFlagHack = mVUsFlagHack;
mVUsFlagHack = 1;
for (u32 i = 0; i < mVU->progSize; i+=2) {
mVU->code = mVU->prog.prog[progIndex].data[i];
@ -308,6 +309,7 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) {
mVUflagInfo = bFlagInfo;
mVU->code = bCode;
mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack;
mVUsFlagHack = bFlagHack;
}
}

View File

@ -29,7 +29,6 @@
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool modXYZW) {
int sReg, mReg = gprT1;
static u8 *pjmp, *pjmp2;
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
@ -40,7 +39,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
if (sFLAG.doFlag) {
getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags
AND32ItoR(sReg, 0xffffff00); // Keep Sticky and D/I flags
}
//-------------------------Check for Signed flags------------------------------
@ -54,25 +53,22 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg
AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW);
if (sFLAG.doFlag) OR32ItoR(sReg, 0x82); // SS, S flags
if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
if (sFLAG.doFlag) x86SetJ8(pjmp);
SHL32ItoR(mReg, 4 + ADD_XYZW);
//-------------------------Check for Zero flags------------------------------
AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
if (sFLAG.doFlag) x86SetJ8(pjmp);
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); }
OR32RtoR(mReg, gprT2);
//-------------------------Write back flags------------------------------
if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
if (sFLAG.doFlag) {
OR32RtoR (sReg, mReg);
SHL32ItoR(mReg, 8);
OR32RtoR (sReg, mReg);
}
}
//------------------------------------------------------------------