mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Second try at a status flag optimization. Still needs tweaking but too tired xD git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1309 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d8fb66f143
commit
96f7911d26
|
@ -647,17 +647,44 @@ microVUt(void) mVUallocFMAC26b(mV, int& ACCw, int& ACCr) {
|
|||
} \
|
||||
}
|
||||
|
||||
#define setBitSFLAG(bitTest, bitSet) { \
|
||||
TEST32ItoR(regT, bitTest); \
|
||||
pjmp = JZ8(0); \
|
||||
OR32ItoR(reg, bitSet); \
|
||||
x86SetJ8(pjmp); \
|
||||
}
|
||||
|
||||
#define setBitFSEQ(bitX) { \
|
||||
TEST32ItoR(gprT1, bitX); \
|
||||
pjmp = JZ8(0); \
|
||||
OR32ItoR(gprT1, bitX); \
|
||||
x86SetJ8(pjmp); \
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocSFLAGa(int reg, int fInstance) {
|
||||
getFlagReg(fInstance, fInstance);
|
||||
MOVZX32R16toR(reg, fInstance);
|
||||
MOV32RtoR(reg, fInstance);
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocSFLAGb(int reg, int fInstance) {
|
||||
getFlagReg(fInstance, fInstance);
|
||||
//AND32ItoR(reg, 0xffff);
|
||||
MOV32RtoR(fInstance, reg);
|
||||
}
|
||||
|
||||
// Normalize Status Flag
|
||||
microVUt(void) mVUallocSFLAGc(int reg, int regT, int fInstance) {
|
||||
u8 *pjmp;
|
||||
XOR32RtoR(reg, reg);
|
||||
mVUallocSFLAGa(regT, fInstance);
|
||||
setBitSFLAG(0x000f, 0x0001); // Z Bit
|
||||
setBitSFLAG(0x00f0, 0x0002); // S Bit
|
||||
setBitSFLAG(0x0f00, 0x0040); // ZS Bit
|
||||
setBitSFLAG(0xf000, 0x0080); // SS Bit
|
||||
AND32ItoR(regT, 0xffff0000); // DS/DI/OS/US/D/I/O/U Bits
|
||||
SHR32ItoR(regT, 14);
|
||||
OR32RtoR(reg, regT);
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocMFLAGa(mV, int reg, int fInstance) {
|
||||
MOVZX32M16toR(reg, (uptr)&mVU->macFlag[fInstance]);
|
||||
}
|
||||
|
|
|
@ -169,8 +169,8 @@ microVUt(void) mVUendProgram(mV, int qInst, int pInst, int fStatus, int fMac, in
|
|||
|
||||
// Save Flag Instances
|
||||
if (!mVUflagHack) {
|
||||
getFlagReg(fStatus, fStatus);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
|
||||
mVUallocSFLAGc(gprT1, gprT2, fStatus);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1);
|
||||
}
|
||||
mVUallocMFLAGa(mVU, gprT1, fMac);
|
||||
mVUallocCFLAGa(mVU, gprT2, fClip);
|
||||
|
@ -196,13 +196,13 @@ microVUt(void) mVUtestCycles(mV) {
|
|||
iPC = mVUstartPC;
|
||||
mVUdebugNOW(0);
|
||||
SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
|
||||
u8* jmp8 = JG8(0);
|
||||
u32* jmp32 = JG32(0);
|
||||
MOV32ItoR(gprT2, xPC);
|
||||
if (!isVU1) CALLFunc((uptr)mVUwarning0);
|
||||
else CALLFunc((uptr)mVUwarning1);
|
||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
mVUendProgram(mVU, 0, 0, sI, 0, cI);
|
||||
x86SetJ8(jmp8);
|
||||
x86SetJ32(jmp32);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -378,7 +378,7 @@ eBitTemination:
|
|||
if (mVUinfo.doDivFlag) {
|
||||
int flagReg;
|
||||
getFlagReg(flagReg, lStatus);
|
||||
AND32ItoR (flagReg, 0x0fcf);
|
||||
AND32ItoR (flagReg, 0xfff3ffff);
|
||||
OR32MtoR (flagReg, (uptr)&mVU->divFlag);
|
||||
}
|
||||
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
|
||||
|
|
|
@ -42,10 +42,24 @@ microVUt(void) mVUdispatcherA(mV) {
|
|||
// Load Regs
|
||||
MOV32ItoR(gprR, Roffset); // Load VI Reg Offset
|
||||
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
|
||||
AND32ItoR(gprF0, 0xffff);
|
||||
|
||||
MOV32RtoR(gprF1, gprF0);
|
||||
SHL32ItoR(gprF1, 3);
|
||||
AND32ItoR(gprF1, 0x218);
|
||||
|
||||
MOV32RtoR(gprF2, gprF0);
|
||||
SHL32ItoR(gprF2, 5);
|
||||
AND32ItoR(gprF2, 0x1000);
|
||||
OR32RtoR (gprF1, gprF2);
|
||||
|
||||
MOV32RtoR(gprF3, gprF0);
|
||||
SHL32ItoR(gprF3, 14);
|
||||
AND32ItoR(gprF3, 0x3cf0000);
|
||||
OR32RtoR (gprF1, gprF3);
|
||||
|
||||
MOV32RtoR(gprF0, gprF1);
|
||||
MOV32RtoR(gprF2, gprF1);
|
||||
MOV32RtoR(gprF3, gprF1);
|
||||
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL);
|
||||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);
|
||||
|
|
|
@ -24,7 +24,7 @@ microVUt(void) mVUdivSet(mV) {
|
|||
if (mVUinfo.doDivFlag) {
|
||||
getFlagReg(flagReg1, sFLAG.write);
|
||||
if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); }
|
||||
AND32ItoR(flagReg1, 0x0fcf);
|
||||
AND32ItoR(flagReg1, 0xfff3ffff);
|
||||
OR32MtoR (flagReg1, (uptr)&mVU->divFlag);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \
|
||||
TEST32ItoR(gprTemp, 1); /* Check sign bit */ \
|
||||
aJump = JZ8(0); /* Skip if positive */ \
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \
|
||||
MOV32ItoM((uptr)&mVU->divFlag, divI); /* Set Invalid Flags */ \
|
||||
SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \
|
||||
x86SetJ8(aJump); \
|
||||
}
|
||||
|
@ -54,10 +54,10 @@ mVUop(mVU_DIV) {
|
|||
|
||||
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
|
||||
ajmp = JZ8(0);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, divI); // Set invalid flag (0/0)
|
||||
bjmp = JMP8(0);
|
||||
x86SetJ8(ajmp);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0)
|
||||
x86SetJ8(bjmp);
|
||||
|
||||
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
|
||||
|
@ -112,10 +112,10 @@ mVUop(mVU_RSQRT) {
|
|||
|
||||
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
|
||||
bjmp = JZ8(0); // Skip if none are
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, divI); // Set invalid flag (0/0)
|
||||
cjmp = JMP8(0);
|
||||
x86SetJ8(bjmp);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide flag (only when not 0/0)
|
||||
x86SetJ8(cjmp);
|
||||
|
||||
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
||||
|
@ -522,7 +522,7 @@ mVUop(mVU_FMOR) {
|
|||
mVUop(mVU_FSAND) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
|
||||
AND16ItoR(gprT1, _Imm12_);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
|
@ -530,23 +530,10 @@ mVUop(mVU_FSAND) {
|
|||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||
}
|
||||
|
||||
mVUop(mVU_FSEQ) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
XOR16ItoR(gprT1, _Imm12_);
|
||||
SUB16ItoR(gprT1, 1);
|
||||
SHR16ItoR(gprT1, 15);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||
}
|
||||
|
||||
mVUop(mVU_FSOR) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
|
||||
OR16ItoR(gprT1, _Imm12_);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
|
@ -554,14 +541,54 @@ mVUop(mVU_FSOR) {
|
|||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||
}
|
||||
|
||||
mVUop(mVU_FSEQ) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
u8 *pjmp;
|
||||
int imm = 0;
|
||||
if (_Imm12_ & 0x0001) imm |= 0x000000f; // Z
|
||||
if (_Imm12_ & 0x0002) imm |= 0x00000f0; // S
|
||||
if (_Imm12_ & 0x0004) imm |= 0x0010000; // U
|
||||
if (_Imm12_ & 0x0008) imm |= 0x0020000; // O
|
||||
if (_Imm12_ & 0x0010) imm |= 0x0040000; // I
|
||||
if (_Imm12_ & 0x0020) imm |= 0x0080000; // D
|
||||
if (_Imm12_ & 0x0040) imm |= 0x0000f00; // ZS
|
||||
if (_Imm12_ & 0x0080) imm |= 0x000f000; // SS
|
||||
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
|
||||
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
|
||||
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
||||
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
||||
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
setBitFSEQ(0x000f); // Z bit
|
||||
setBitFSEQ(0x00f0); // S bit
|
||||
setBitFSEQ(0x0f00); // ZS bit
|
||||
setBitFSEQ(0xf000); // SS bit
|
||||
XOR32ItoR(gprT1, imm);
|
||||
SUB32ItoR(gprT1, 1);
|
||||
SHR32ItoR(gprT1, 31);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
||||
pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; }
|
||||
}
|
||||
|
||||
mVUop(mVU_FSSET) {
|
||||
pass1 { mVUanalyzeFSSET(mVU); }
|
||||
pass2 {
|
||||
int flagReg1, flagReg2;
|
||||
getFlagReg(flagReg1, sFLAG.write);
|
||||
if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction
|
||||
AND32ItoR(flagReg1, 0x03f);
|
||||
OR32ItoR (flagReg1, (_Imm12_ & 0xfc0));
|
||||
int sReg, imm = 0;
|
||||
if (_Imm12_ & 0x0040) imm |= 0x0000f00; // ZS
|
||||
if (_Imm12_ & 0x0080) imm |= 0x000f000; // SS
|
||||
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
|
||||
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
|
||||
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
||||
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
||||
getFlagReg(sReg, sFLAG.write);
|
||||
if (!(sFLAG.doFlag || mVUinfo.doDivFlag)) {
|
||||
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
||||
}
|
||||
AND32ItoR(sReg, 0xf00ff); // Keep Non-Sticky Bits
|
||||
if (imm) OR32ItoR(sReg, imm);
|
||||
}
|
||||
pass3 { mVUlog("FSSET $%x", _Imm12_); }
|
||||
pass4 { mVUsFlagHack = 0; }
|
||||
|
|
|
@ -105,6 +105,9 @@ declareAllVariables
|
|||
#define _Tbit_ (1<<27)
|
||||
#define _MDTbit_ 0 //( _Mbit_ | _Dbit_ | _Tbit_ ) // ToDo: Implement this stuff...
|
||||
|
||||
#define divI 0x1040000
|
||||
#define divD 0x2080000
|
||||
|
||||
#define isVU1 (mVU == µVU1)
|
||||
#define getIndex (isVU1 ? 1 : 0)
|
||||
#define getVUmem(x) (((isVU1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16)
|
||||
|
|
|
@ -300,6 +300,7 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) {
|
|||
if (CHECK_VU_FLAGHACK1) {
|
||||
int bFlagInfo = mVUflagInfo;
|
||||
int bCode = mVU->code;
|
||||
int bFlagHack = mVUsFlagHack;
|
||||
mVUsFlagHack = 1;
|
||||
for (u32 i = 0; i < mVU->progSize; i+=2) {
|
||||
mVU->code = mVU->prog.prog[progIndex].data[i];
|
||||
|
@ -308,6 +309,7 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) {
|
|||
mVUflagInfo = bFlagInfo;
|
||||
mVU->code = bCode;
|
||||
mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack;
|
||||
mVUsFlagHack = bFlagHack;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
|
||||
microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool modXYZW) {
|
||||
int sReg, mReg = gprT1;
|
||||
static u8 *pjmp, *pjmp2;
|
||||
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
||||
|
||||
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
|
||||
|
@ -40,7 +39,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
|
|||
if (sFLAG.doFlag) {
|
||||
getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance
|
||||
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
||||
AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags
|
||||
AND32ItoR(sReg, 0xffffff00); // Keep Sticky and D/I flags
|
||||
}
|
||||
|
||||
//-------------------------Check for Signed flags------------------------------
|
||||
|
@ -54,25 +53,22 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
|
|||
SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg
|
||||
|
||||
AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation
|
||||
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
|
||||
if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW);
|
||||
if (sFLAG.doFlag) OR32ItoR(sReg, 0x82); // SS, S flags
|
||||
if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
|
||||
if (sFLAG.doFlag) x86SetJ8(pjmp);
|
||||
SHL32ItoR(mReg, 4 + ADD_XYZW);
|
||||
|
||||
//-------------------------Check for Zero flags------------------------------
|
||||
|
||||
AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
|
||||
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
|
||||
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
|
||||
if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
|
||||
if (sFLAG.doFlag) x86SetJ8(pjmp);
|
||||
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); }
|
||||
OR32RtoR(mReg, gprT2);
|
||||
|
||||
//-------------------------Write back flags------------------------------
|
||||
|
||||
if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
|
||||
|
||||
if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
|
||||
if (sFLAG.doFlag) {
|
||||
OR32RtoR (sReg, mReg);
|
||||
SHL32ItoR(mReg, 8);
|
||||
OR32RtoR (sReg, mReg);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue