From b96ab5621f40459049f09b7a3211573ff56d22b0 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sat, 24 Oct 2009 23:33:11 +0000 Subject: [PATCH] Implemented Aligned Stack for microVU and superVU (mVUs is currently ifdef'd for GCC only, since implementing aligned stack for other compilers that don't automatically assume it requires some complexity and overhead). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2072 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 7 +++++ pcsx2/x86/microVU.h | 4 --- pcsx2/x86/microVU_Branch.inl | 5 ---- pcsx2/x86/microVU_Compile.inl | 2 -- pcsx2/x86/microVU_Execute.inl | 51 +++++++++++++++++++++-------------- pcsx2/x86/microVU_Lower.inl | 9 +------ pcsx2/x86/microVU_Macro.inl | 2 -- pcsx2/x86/microVU_Misc.inl | 1 - pcsx2/x86/sVU_Lower.cpp | 2 +- pcsx2/x86/sVU_Micro.h | 2 +- pcsx2/x86/sVU_zerorec.cpp | 17 +++++------- 11 files changed, 47 insertions(+), 55 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 5cc24d5a49..58a247a31a 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -16,6 +16,13 @@ // Micro VU recompiler! - author: cottonvibes(@gmail.com) #include "PrecompiledHeader.h" +#include "Common.h" +#include "VU.h" +#include "GS.h" +#include "x86emitter/x86emitter.h" + +using namespace x86Emitter; + #include "microVU.h" //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 1d60332b53..bdbe2e36dd 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -17,10 +17,6 @@ //#define mVUdebug // Prints Extra Info to Console //#define mVUlogProg // Dumps MicroPrograms to \logs\*.html -#include "Common.h" -#include "VU.h" -#include "GS.h" -#include "x86emitter/x86emitter.h" #include "microVU_IR.h" #include "microVU_Misc.h" diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index ff13212764..6df70a9154 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -90,7 +90,6 @@ microVUt(void) mVUsetupBranch(mV, microFlagCycles& mFC) { } void normBranchCompile(microVU* mVU, u32 branchPC) { - using namespace x86Emitter; microBlock* pBlock; blockCreate(branchPC/8); pBlock = mVUblocks[branchPC/8]->search((microRegInfo*)&mVUregs); @@ -99,7 +98,6 @@ void normBranchCompile(microVU* mVU, u32 branchPC) { } void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) { - using namespace x86Emitter; memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); mVUsetupBranch(mVU, mFC); mVUbackupRegs(mVU); @@ -126,7 +124,6 @@ void normBranch(mV, microFlagCycles& mFC) { } void condBranch(mV, microFlagCycles& mFC, int JMPcc) { - using namespace x86Emitter; mVUsetupBranch(mVU, mFC); xCMP(ptr16[&mVU->branch], 0); incPC(3); @@ -172,8 +169,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) { } void normJump(mV, microFlagCycles& mFC) { - using namespace x86Emitter; - if (mVUlow.constJump.isValid) { // Jump Address is Constant if (mVUup.eBit) { // E-bit Jump iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 5e19caf816..82567512d1 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -333,7 +333,6 @@ microVUt(void) mVUinitFirstPass(microVU* mVU, uptr pState, u8* thisPtr) { microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { - using namespace x86Emitter; microFlagCycles mFC; u8* thisPtr = x86Ptr; const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU->microMemSize / 8); @@ -414,7 +413,6 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { // Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr) microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) { - using namespace x86Emitter; if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); } startPC &= mVU->microMemSize-8; diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index beff78c046..b4026f0ba0 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -23,18 +23,23 @@ void mVUdispatcherA(mV) { mVU->startFunct = x86Ptr; - // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. - if (!isVU1) { CALLFunc((uptr)mVUexecuteVU0); } - else { CALLFunc((uptr)mVUexecuteVU1); } - // Backup cpu state - PUSH32R(EBX); - PUSH32R(EBP); - PUSH32R(ESI); - PUSH32R(EDI); + xPUSH(ebp); + xPUSH(ebx); + xPUSH(esi); + xPUSH(edi); + + // Align the stackframe (GCC only, since GCC assumes stackframe is always aligned) + #ifdef __GNUC__ + xSUB(esp, 12); + #endif + + // __fastcall = The caller has already put the needed parameters in ecx/edx: + if (!isVU1) { xCALL(mVUexecuteVU0); } + else { xCALL(mVUexecuteVU1); } // Load VU's MXCSR state - SSE_LDMXCSR((uptr)&g_sseVUMXCSR); + xLDMXCSR(&g_sseVUMXCSR); // Load Regs #ifdef CHECK_MACROVU0 @@ -59,7 +64,7 @@ void mVUdispatcherA(mV) { SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ // Jump to Recompiled Code Block - JMPR(EAX); + xJMP(eax); } // Generates the code to exit from recompiled blocks @@ -67,19 +72,25 @@ void mVUdispatcherB(mV) { mVU->exitFunct = x86Ptr; // Load EE's MXCSR state - SSE_LDMXCSR((uptr)&g_sseMXCSR); + xLDMXCSR(&g_sseMXCSR); - // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. - if (!isVU1) { CALLFunc((uptr)mVUcleanUpVU0); } - else { CALLFunc((uptr)mVUcleanUpVU1); } + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; + // all other arguments are passed right to left. + if (!isVU1) { xCALL(mVUcleanUpVU0); } + else { xCALL(mVUcleanUpVU1); } + + // Unalign the stackframe: + #ifdef __GNUC__ + xADD( esp, 12 ); + #endif // Restore cpu state - POP32R(EDI); - POP32R(ESI); - POP32R(EBP); - POP32R(EBX); + xPOP(edi); + xPOP(esi); + xPOP(ebx); + xPOP(ebp); - RET(); + xRET(); mVUcacheCheck(x86Ptr, mVU->cache, 0x1000); } @@ -98,7 +109,7 @@ microVUx(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { mVU->cycles = cycles; mVU->totalCycles = cycles; - x86SetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off + xSetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 5bbd2845bf..b6b9e89203 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1128,7 +1128,7 @@ microVUt(void) mVU_XGKICK_DELAY(mV, bool memVI) { mVUbackupRegs(mVU); if (memVI) MOV32MtoR(gprT2, (uptr)&mVU->VIxgkick); else mVUallocVIa(mVU, gprT2, _Is_); - CALLFunc((uptr)mVU_XGKICK_); + xCALL(mVU_XGKICK_); mVUrestoreRegs(mVU); } @@ -1159,7 +1159,6 @@ void setBranchA(mP, int x, int _x_) { } void condEvilBranch(mV, int JMPcc) { - using namespace x86Emitter; if (mVUlow.badBranch) { xMOV(ptr32[&mVU->branch], eax); xMOV(ptr32[&mVU->badBranch], branchAddrN); @@ -1202,7 +1201,6 @@ mVUop(mVU_BAL) { } mVUop(mVU_IBEQ) { - using namespace x86Emitter; setBranchA(mX, 3, 0); pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } pass2 { @@ -1219,7 +1217,6 @@ mVUop(mVU_IBEQ) { } mVUop(mVU_IBGEZ) { - using namespace x86Emitter; setBranchA(mX, 4, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { @@ -1232,7 +1229,6 @@ mVUop(mVU_IBGEZ) { } mVUop(mVU_IBGTZ) { - using namespace x86Emitter; setBranchA(mX, 5, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { @@ -1245,7 +1241,6 @@ mVUop(mVU_IBGTZ) { } mVUop(mVU_IBLEZ) { - using namespace x86Emitter; setBranchA(mX, 6, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { @@ -1258,7 +1253,6 @@ mVUop(mVU_IBLEZ) { } mVUop(mVU_IBLTZ) { - using namespace x86Emitter; setBranchA(mX, 7, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass2 { @@ -1271,7 +1265,6 @@ mVUop(mVU_IBLTZ) { } mVUop(mVU_IBNE) { - using namespace x86Emitter; setBranchA(mX, 8, 0); pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } pass2 { diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index 94aced4efe..fba798abd9 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -19,8 +19,6 @@ #include "iR5900.h" #include "R5900OpcodeTables.h" -using namespace x86Emitter; - extern void _vu0WaitMicro(); extern void _vu0FinishMicro(); diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 62a868d8e7..0fc5874672 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -520,7 +520,6 @@ static __pagealigned u8 mVUsearchXMM[0x1000]; // Generates a custom optimized block-search function // Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this) void mVUcustomSearch() { - using namespace x86Emitter; HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false); memset_8<0xcc,0x1000>(mVUsearchXMM); xSetPtr(mVUsearchXMM); diff --git a/pcsx2/x86/sVU_Lower.cpp b/pcsx2/x86/sVU_Lower.cpp index f51c6f9fb7..1d44f35fba 100644 --- a/pcsx2/x86/sVU_Lower.cpp +++ b/pcsx2/x86/sVU_Lower.cpp @@ -1969,7 +1969,7 @@ void recVUMI_XTOP( VURegs *VU, int info ) //------------------------------------------------------------------ // VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp //------------------------------------------------------------------ -void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) +void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) { u32 size; u8* data = ((u8*)pMem + (addr&0x3fff)); diff --git a/pcsx2/x86/sVU_Micro.h b/pcsx2/x86/sVU_Micro.h index 10e29b9894..9d7415f380 100644 --- a/pcsx2/x86/sVU_Micro.h +++ b/pcsx2/x86/sVU_Micro.h @@ -61,7 +61,7 @@ struct _vuopinfo { void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs); int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices -void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK +void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK extern int vucycle; typedef void (*vFloat)(int regd, int regTemp); diff --git a/pcsx2/x86/sVU_zerorec.cpp b/pcsx2/x86/sVU_zerorec.cpp index 2b6f6c3725..88b7817099 100644 --- a/pcsx2/x86/sVU_zerorec.cpp +++ b/pcsx2/x86/sVU_zerorec.cpp @@ -2550,8 +2550,6 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex) svutime += (u32)(svufinal.QuadPart - svubase.QuadPart); #endif - assert(s_vu1esp == 0); - VU = vuindex ? &VU1 : &VU0; VU->cycle += s_TotalVUCycles; @@ -2601,9 +2599,8 @@ __declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex) mov s_vuedi, edi mov s_vuebx, ebx -#ifdef PCSX2_DEBUG mov s_vu1esp, esp -#endif + and esp, -16 // align stack for GCC compilance //stmxcsr s_ssecsr ldmxcsr g_sseVUMXCSR @@ -2629,9 +2626,7 @@ __declspec(naked) static void SuperVUEndProgram() mov edi, s_vuedi mov ebx, s_vuebx -#ifdef PCSX2_DEBUG - sub s_vu1esp, esp -#endif + mov esp, s_vu1esp // restore from aligned stack call SuperVUCleanupProgram jmp s_callstack // so returns correctly @@ -4337,11 +4332,11 @@ void recVUMI_XGKICK_(VURegs *VU) _freeXMMregs(); OR32ItoM((uptr)&psHu32(GIF_STAT), (GIF_STAT_APATH1 | GIF_STAT_OPH)); // Set PATH1 GIF Status Flags - PUSH32R(s_XGKICKReg); - PUSH32I((uptr)VU->Mem); - CALLFunc((uptr)VU1XGKICK_MTGSTransfer); - ADD32ItoR(ESP, 8); + xMOV(edx, xRegister32(s_XGKICKReg)); + xMOV(ecx, (uptr)VU->Mem); + xCALL(VU1XGKICK_MTGSTransfer); + AND32ItoM((uptr)&psHu32(GIF_STAT), ~(GIF_STAT_APATH1 | GIF_STAT_OPH)); // Clear PATH1 GIF Status Flags s_ScheduleXGKICK = 0; }