From ebcedccf23446f3679d41b3083bd65e8423c1d33 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 4 Apr 2009 06:20:48 +0000 Subject: [PATCH] pcsx2: - removed some obsolete 'iCWstate' code. microVU: - implemented more rec first pass stuff for the lower instructions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@903 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iCore.h | 2 +- pcsx2/x86/iFPU.cpp | 33 ------------------------- pcsx2/x86/iFPU.h | 3 --- pcsx2/x86/iFPUd.cpp | 35 +-------------------------- pcsx2/x86/iR5900.h | 4 --- pcsx2/x86/iVU0micro.cpp | 3 +-- pcsx2/x86/iVU1micro.cpp | 1 - pcsx2/x86/ix86-32/iCore-32.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 8 +----- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 2 -- pcsx2/x86/microVU_Alloc.h | 6 +++-- pcsx2/x86/microVU_Analyze.inl | 26 +++++++++++++++++--- pcsx2/x86/microVU_Compile.inl | 4 ++- pcsx2/x86/microVU_Lower.inl | 32 ++++++++++++------------ pcsx2/x86/microVU_Misc.h | 2 +- 15 files changed, 52 insertions(+), 111 deletions(-) diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 8191ddcfc2..3397805068 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -362,7 +362,7 @@ void _recMove128MtoRmOffset(u32 offset, u32 from); extern int _signExtendGPRtoMMX(x86MMXRegType to, u32 gprreg, int shift); extern _mmxregs mmxregs[MMXREGS], s_saveMMXregs[MMXREGS]; -extern u16 x86FpuState, iCWstate; +extern u16 x86FpuState; extern void iDumpRegisters(u32 startpc, u32 temp); diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 601159d52c..27dfd1471f 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -23,39 +23,6 @@ #include "iR5900.h" #include "iFPU.h" -//------------------------------------------------------------------ -// Misc... -//------------------------------------------------------------------ -//static u32 _mxcsr = 0x7F80; -//static u32 _mxcsrs; -static u32 fpucw = 0x007f; -static u32 fpucws = 0; - -void SaveCW(int type) { - if (iCWstate & type) return; - - if (type == 2) { -// SSE_STMXCSR((uptr)&_mxcsrs); -// SSE_LDMXCSR((uptr)&_mxcsr); - } else { - FNSTCW( (uptr)&fpucws ); - FLDCW( (uptr)&fpucw ); - } - iCWstate|= type; -} - -void LoadCW() { - if (iCWstate == 0) return; - - if (iCWstate & 2) { - //SSE_LDMXCSR((uptr)&_mxcsrs); - } - if (iCWstate & 1) { - FLDCW( (uptr)&fpucws ); - } - iCWstate = 0; -} - //------------------------------------------------------------------ namespace R5900 { namespace Dynarec { diff --git a/pcsx2/x86/iFPU.h b/pcsx2/x86/iFPU.h index d6b566999f..f813612eaa 100644 --- a/pcsx2/x86/iFPU.h +++ b/pcsx2/x86/iFPU.h @@ -22,9 +22,6 @@ namespace R5900 { namespace Dynarec { - void SaveCW(); - void LoadCW(); - namespace OpcodeImpl { namespace COP1 { diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index 7f6221f08c..ec7b489b76 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -39,40 +39,7 @@ //set I&D flags. also impacts other aspects of DIV/R/SQRT correctness #define FPU_FLAGS_ID 1 - -//------------------------------------------------------------------ -// Misc... -//------------------------------------------------------------------ -//static u32 _mxcsr = 0x7F80; -//static u32 _mxcsrs; -/*static u32 fpucw = 0x007f; -static u32 fpucws = 0; - -void SaveCW(int type) { - if (iCWstate & type) return; - - if (type == 2) { -// SSE_STMXCSR((uptr)&_mxcsrs); -// SSE_LDMXCSR((uptr)&_mxcsr); - } else { - FNSTCW( (uptr)&fpucws ); - FLDCW( (uptr)&fpucw ); - } - iCWstate|= type; -} - -void LoadCW() { - if (iCWstate == 0) return; - - if (iCWstate & 2) { - //SSE_LDMXCSR((uptr)&_mxcsrs); - } - if (iCWstate & 1) { - FLDCW( (uptr)&fpucws ); - } - iCWstate = 0; -} -*/ + #ifdef FPU_RECOMPILE //------------------------------------------------------------------ diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index ff2406db19..15fc5ef33b 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -117,10 +117,6 @@ void iFlushCall(int flushtype); void recBranchCall( void (*func)() ); void recCall( void (*func)(), int delreg ); -// these are defined in iFPU.cpp -void LoadCW(); -void SaveCW(int type); - extern void recExecute(); // same as recCpu.Execute(), but faster (can be inline'd) namespace R5900{ diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index b3c9887908..4b70381294 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -44,9 +44,8 @@ namespace VU0micro { SuperVUReset(0); - // these shouldn't be needed, but shouldn't hurt anything either. + // this shouldn't be needed, but shouldn't hurt anything either. x86FpuState = FPU_STATE; - iCWstate = 0; } static void recStep() diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index c56eae133c..2c4cc00bb4 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -85,7 +85,6 @@ namespace VU1micro // these shouldn't be needed, but shouldn't hurt anything either. x86FpuState = FPU_STATE; - iCWstate = 0; } static void recStep() diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index d3557228e0..0a70906ba8 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -33,7 +33,7 @@ using namespace std; extern u32 g_psxConstRegs[32]; -u16 x86FpuState, iCWstate; +u16 x86FpuState; u16 g_mmxAllocCounter = 0; // X86 caching diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index b38fa22580..06f3b62b92 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -92,7 +92,7 @@ static u32 s_nHasDelay = 0; // save states for branches GPR_reg64 s_saveConstRegs[32]; -static u16 s_savex86FpuState, s_saveiCWstate; +static u16 s_savex86FpuState; static u32 s_saveHasConstReg = 0, s_saveFlushedConstReg = 0, s_saveRegHasLive1 = 0, s_saveRegHasSignExt = 0; static EEINST* s_psaveInstInfo = NULL; @@ -587,7 +587,6 @@ void recResetEE( void ) recPtr = recMem; recStackPtr = recStack; x86FpuState = FPU_STATE; - iCWstate = 0; branch = 0; SetCPUState(Config.sseMXCSR, Config.sseVUMXCSR); @@ -987,7 +986,6 @@ void SetBranchImm( u32 imm ) void SaveBranchState() { s_savex86FpuState = x86FpuState; - s_saveiCWstate = iCWstate; s_savenBlockCycles = s_nBlockCycles; memcpy(s_saveConstRegs, g_cpuConstRegs, sizeof(g_cpuConstRegs)); s_saveHasConstReg = g_cpuHasConstReg; @@ -1004,7 +1002,6 @@ void SaveBranchState() void LoadBranchState() { x86FpuState = s_savex86FpuState; - iCWstate = s_saveiCWstate; s_nBlockCycles = s_savenBlockCycles; memcpy(g_cpuConstRegs, s_saveConstRegs, sizeof(g_cpuConstRegs)); @@ -1036,8 +1033,6 @@ void iFlushCall(int flushtype) if( flushtype & FLUSH_CACHED_REGS ) _flushConstRegs(); - LoadCW(); - if (x86FpuState==MMX_STATE) { if (cpucaps.has3DNOWInstructionExtensions) FEMMS(); else EMMS(); @@ -1406,7 +1401,6 @@ void recRecompile( const u32 startpc ) s_nBlockCycles = 0; pc = startpc; x86FpuState = FPU_STATE; - iCWstate = 0; g_cpuHasConstReg = g_cpuFlushedConstReg = 1; g_cpuPrevRegHasLive1 = g_cpuRegHasLive1 = 0xffffffff; g_cpuPrevRegHasSignExt = g_cpuRegHasSignExt = 0; diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 37ee17bcfa..83925362c3 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -230,8 +230,6 @@ int recSetMemLocation(int regs, int imm, int mmreg, int msize, int j32) if ( imm != 0 ) ADD32ItoR( ECX, imm ); - LoadCW(); - #ifdef _DEBUG //CALLFunc((uptr)testaddrs); #endif diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 4361ea0e74..c539eb1297 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -40,6 +40,8 @@ struct microTempRegInfo { u8 VFreg[2]; // Index of the VF reg u8 VI; // Holds cycle info for Id u8 VIreg; // Index of the VI reg + u8 q; // Holds cycle info for Q reg + u8 p; // Holds cycle info for P reg }; template @@ -49,9 +51,9 @@ struct microAllocInfo { u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time. - u32 curPC; // Current PC + u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block - u32 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) + u32 curPC; // Current PC u32 info[pSize]; // bit 00 = Lower Instruction is NOP // bit 01 // bit 02 diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 23050cd585..f35299ab28 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -27,8 +27,8 @@ // FMAC1 - Normal FMAC Opcodes //------------------------------------------------------------------ -#define aReg(x) mVUallocInfo.regs.VF[x] -#define bReg(x) mVUallocInfo.regsTemp.VFreg[0] = x; mVUallocInfo.regsTemp.VF[0] +#define aReg(x) mVUregs.VF[x] +#define bReg(x) mVUregsTemp.VFreg[0] = x; mVUregsTemp.VF[0] #define aMax(x, y) ((x > y) ? x : y) #define analyzeReg1(reg) { \ @@ -117,10 +117,30 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { } \ } -microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf) { +#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = aMax(mVUstall, mVUregs.q); } +#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } + +microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { microVU* mVU = mVUx; analyzeReg5(Fs, Fsf); analyzeReg5(Ft, Ftf); + analyzeQreg(xCycles); +} + +//------------------------------------------------------------------ +// EFU - EFU Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeEFU1(int Fs, int Fsf, u8 xCycles) { + microVU* mVU = mVUx; + analyzeReg5(Fs, Fsf); + analyzePreg(xCycles); +} + +microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { + microVU* mVU = mVUx; + analyzeReg1(Fs); + analyzePreg(xCycles); } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index e07dff38e8..49c517de5e 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -50,8 +50,10 @@ microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg; - mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg; + mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg); mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI; + mVUregs.q = mVUregsTemp.q; + mVUregs.p = mVUregsTemp.p; } microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 7d6d83a51b..8e7859e033 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -41,7 +41,7 @@ microVUf(void) mVU_DIV() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_); } + if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 7); } else { u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); @@ -76,7 +76,7 @@ microVUf(void) mVU_DIV() { microVUf(void) mVU_SQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_); } + if (!recPass) { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_, 7); } else { u8 *ajmp; getReg5(xmmFt, _Ft_, _Ftf_); @@ -93,7 +93,7 @@ microVUf(void) mVU_SQRT() { microVUf(void) mVU_RSQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_); } + if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 13); } else { u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); @@ -158,7 +158,7 @@ microVUt(void) mVU_EATAN_() { microVUf(void) mVU_EATAN() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 54); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -174,7 +174,7 @@ microVUf(void) mVU_EATAN() { microVUf(void) mVU_EATANxy() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 54); } else { getReg6(xmmFt, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01); @@ -191,7 +191,7 @@ microVUf(void) mVU_EATANxy() { microVUf(void) mVU_EATANxz() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 54); } else { getReg6(xmmFt, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02); @@ -215,7 +215,7 @@ microVUf(void) mVU_EATANxz() { microVUf(void) mVU_EEXP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 44); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -263,7 +263,7 @@ microVUt(void) mVU_sumXYZ() { microVUf(void) mVU_ELENG() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 18); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -275,7 +275,7 @@ microVUf(void) mVU_ELENG() { microVUf(void) mVU_ERCPR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -289,7 +289,7 @@ microVUf(void) mVU_ERCPR() { microVUf(void) mVU_ERLENG() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 24); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -304,7 +304,7 @@ microVUf(void) mVU_ERLENG() { microVUf(void) mVU_ERSADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 18); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -319,7 +319,7 @@ microVUf(void) mVU_ERSADD() { microVUf(void) mVU_ERSQRT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 18); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -333,7 +333,7 @@ microVUf(void) mVU_ERSQRT() { microVUf(void) mVU_ESADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 11); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -351,7 +351,7 @@ microVUf(void) mVU_ESADD() { microVUf(void) mVU_ESIN() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 29); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -377,7 +377,7 @@ microVUf(void) mVU_ESIN() { microVUf(void) mVU_ESQRT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -388,7 +388,7 @@ microVUf(void) mVU_ESQRT() { microVUf(void) mVU_ESUM() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 12); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 28231ca34e..ce9272f201 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -99,7 +99,7 @@ declareAllVariables #define _Mbit_ (1<<29) #define _Dbit_ (1<<28) #define _Tbit_ (1<<27) -#define _MDTbit_ ( _Mbit_ | _Dbit_ | _Tbit_ ) +#define _MDTbit_ 0 //( _Mbit_ | _Dbit_ | _Tbit_ ) // ToDo: Implement this stuff... #define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) #define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))