Implemented Aligned Stack for microVU and superVU (mVUs is currently ifdef'd for GCC only, since implementing aligned stack for other compilers that don't automatically assume it requires some complexity and overhead).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2072 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-10-24 23:33:11 +00:00
parent 984e79ae03
commit b96ab5621f
11 changed files with 47 additions and 55 deletions

View File

@ -16,6 +16,13 @@
// Micro VU recompiler! - author: cottonvibes(@gmail.com) // Micro VU recompiler! - author: cottonvibes(@gmail.com)
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "Common.h"
#include "VU.h"
#include "GS.h"
#include "x86emitter/x86emitter.h"
using namespace x86Emitter;
#include "microVU.h" #include "microVU.h"
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -17,10 +17,6 @@
//#define mVUdebug // Prints Extra Info to Console //#define mVUdebug // Prints Extra Info to Console
//#define mVUlogProg // Dumps MicroPrograms to \logs\*.html //#define mVUlogProg // Dumps MicroPrograms to \logs\*.html
#include "Common.h"
#include "VU.h"
#include "GS.h"
#include "x86emitter/x86emitter.h"
#include "microVU_IR.h" #include "microVU_IR.h"
#include "microVU_Misc.h" #include "microVU_Misc.h"

View File

@ -90,7 +90,6 @@ microVUt(void) mVUsetupBranch(mV, microFlagCycles& mFC) {
} }
void normBranchCompile(microVU* mVU, u32 branchPC) { void normBranchCompile(microVU* mVU, u32 branchPC) {
using namespace x86Emitter;
microBlock* pBlock; microBlock* pBlock;
blockCreate(branchPC/8); blockCreate(branchPC/8);
pBlock = mVUblocks[branchPC/8]->search((microRegInfo*)&mVUregs); pBlock = mVUblocks[branchPC/8]->search((microRegInfo*)&mVUregs);
@ -99,7 +98,6 @@ void normBranchCompile(microVU* mVU, u32 branchPC) {
} }
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) { void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
using namespace x86Emitter;
memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch(mVU, mFC); mVUsetupBranch(mVU, mFC);
mVUbackupRegs(mVU); mVUbackupRegs(mVU);
@ -126,7 +124,6 @@ void normBranch(mV, microFlagCycles& mFC) {
} }
void condBranch(mV, microFlagCycles& mFC, int JMPcc) { void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
using namespace x86Emitter;
mVUsetupBranch(mVU, mFC); mVUsetupBranch(mVU, mFC);
xCMP(ptr16[&mVU->branch], 0); xCMP(ptr16[&mVU->branch], 0);
incPC(3); incPC(3);
@ -172,8 +169,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
} }
void normJump(mV, microFlagCycles& mFC) { void normJump(mV, microFlagCycles& mFC) {
using namespace x86Emitter;
if (mVUlow.constJump.isValid) { // Jump Address is Constant if (mVUlow.constJump.isValid) { // Jump Address is Constant
if (mVUup.eBit) { // E-bit Jump if (mVUup.eBit) { // E-bit Jump
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1); iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);

View File

@ -333,7 +333,6 @@ microVUt(void) mVUinitFirstPass(microVU* mVU, uptr pState, u8* thisPtr) {
microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
using namespace x86Emitter;
microFlagCycles mFC; microFlagCycles mFC;
u8* thisPtr = x86Ptr; u8* thisPtr = x86Ptr;
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU->microMemSize / 8); const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU->microMemSize / 8);
@ -414,7 +413,6 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
// Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr) // Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr)
microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) { microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) {
using namespace x86Emitter;
if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); } if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); }
startPC &= mVU->microMemSize-8; startPC &= mVU->microMemSize-8;

View File

@ -23,18 +23,23 @@
void mVUdispatcherA(mV) { void mVUdispatcherA(mV) {
mVU->startFunct = x86Ptr; mVU->startFunct = x86Ptr;
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!isVU1) { CALLFunc((uptr)mVUexecuteVU0); }
else { CALLFunc((uptr)mVUexecuteVU1); }
// Backup cpu state // Backup cpu state
PUSH32R(EBX); xPUSH(ebp);
PUSH32R(EBP); xPUSH(ebx);
PUSH32R(ESI); xPUSH(esi);
PUSH32R(EDI); xPUSH(edi);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 12);
#endif
// __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xCALL(mVUexecuteVU0); }
else { xCALL(mVUexecuteVU1); }
// Load VU's MXCSR state // Load VU's MXCSR state
SSE_LDMXCSR((uptr)&g_sseVUMXCSR); xLDMXCSR(&g_sseVUMXCSR);
// Load Regs // Load Regs
#ifdef CHECK_MACROVU0 #ifdef CHECK_MACROVU0
@ -59,7 +64,7 @@ void mVUdispatcherA(mV) {
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block // Jump to Recompiled Code Block
JMPR(EAX); xJMP(eax);
} }
// Generates the code to exit from recompiled blocks // Generates the code to exit from recompiled blocks
@ -67,19 +72,25 @@ void mVUdispatcherB(mV) {
mVU->exitFunct = x86Ptr; mVU->exitFunct = x86Ptr;
// Load EE's MXCSR state // Load EE's MXCSR state
SSE_LDMXCSR((uptr)&g_sseMXCSR); xLDMXCSR(&g_sseMXCSR);
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
if (!isVU1) { CALLFunc((uptr)mVUcleanUpVU0); } // all other arguments are passed right to left.
else { CALLFunc((uptr)mVUcleanUpVU1); } if (!isVU1) { xCALL(mVUcleanUpVU0); }
else { xCALL(mVUcleanUpVU1); }
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state // Restore cpu state
POP32R(EDI); xPOP(edi);
POP32R(ESI); xPOP(esi);
POP32R(EBP); xPOP(ebx);
POP32R(EBX); xPOP(ebp);
RET(); xRET();
mVUcacheCheck(x86Ptr, mVU->cache, 0x1000); mVUcacheCheck(x86Ptr, mVU->cache, 0x1000);
} }
@ -98,7 +109,7 @@ microVUx(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
mVU->cycles = cycles; mVU->cycles = cycles;
mVU->totalCycles = cycles; mVU->totalCycles = cycles;
x86SetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off xSetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState); return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState);
} }

View File

@ -1128,7 +1128,7 @@ microVUt(void) mVU_XGKICK_DELAY(mV, bool memVI) {
mVUbackupRegs(mVU); mVUbackupRegs(mVU);
if (memVI) MOV32MtoR(gprT2, (uptr)&mVU->VIxgkick); if (memVI) MOV32MtoR(gprT2, (uptr)&mVU->VIxgkick);
else mVUallocVIa(mVU, gprT2, _Is_); else mVUallocVIa(mVU, gprT2, _Is_);
CALLFunc((uptr)mVU_XGKICK_); xCALL(mVU_XGKICK_);
mVUrestoreRegs(mVU); mVUrestoreRegs(mVU);
} }
@ -1159,7 +1159,6 @@ void setBranchA(mP, int x, int _x_) {
} }
void condEvilBranch(mV, int JMPcc) { void condEvilBranch(mV, int JMPcc) {
using namespace x86Emitter;
if (mVUlow.badBranch) { if (mVUlow.badBranch) {
xMOV(ptr32[&mVU->branch], eax); xMOV(ptr32[&mVU->branch], eax);
xMOV(ptr32[&mVU->badBranch], branchAddrN); xMOV(ptr32[&mVU->badBranch], branchAddrN);
@ -1202,7 +1201,6 @@ mVUop(mVU_BAL) {
} }
mVUop(mVU_IBEQ) { mVUop(mVU_IBEQ) {
using namespace x86Emitter;
setBranchA(mX, 3, 0); setBranchA(mX, 3, 0);
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
pass2 { pass2 {
@ -1219,7 +1217,6 @@ mVUop(mVU_IBEQ) {
} }
mVUop(mVU_IBGEZ) { mVUop(mVU_IBGEZ) {
using namespace x86Emitter;
setBranchA(mX, 4, 0); setBranchA(mX, 4, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 { pass2 {
@ -1232,7 +1229,6 @@ mVUop(mVU_IBGEZ) {
} }
mVUop(mVU_IBGTZ) { mVUop(mVU_IBGTZ) {
using namespace x86Emitter;
setBranchA(mX, 5, 0); setBranchA(mX, 5, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 { pass2 {
@ -1245,7 +1241,6 @@ mVUop(mVU_IBGTZ) {
} }
mVUop(mVU_IBLEZ) { mVUop(mVU_IBLEZ) {
using namespace x86Emitter;
setBranchA(mX, 6, 0); setBranchA(mX, 6, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 { pass2 {
@ -1258,7 +1253,6 @@ mVUop(mVU_IBLEZ) {
} }
mVUop(mVU_IBLTZ) { mVUop(mVU_IBLTZ) {
using namespace x86Emitter;
setBranchA(mX, 7, 0); setBranchA(mX, 7, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 { pass2 {
@ -1271,7 +1265,6 @@ mVUop(mVU_IBLTZ) {
} }
mVUop(mVU_IBNE) { mVUop(mVU_IBNE) {
using namespace x86Emitter;
setBranchA(mX, 8, 0); setBranchA(mX, 8, 0);
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
pass2 { pass2 {

View File

@ -19,8 +19,6 @@
#include "iR5900.h" #include "iR5900.h"
#include "R5900OpcodeTables.h" #include "R5900OpcodeTables.h"
using namespace x86Emitter;
extern void _vu0WaitMicro(); extern void _vu0WaitMicro();
extern void _vu0FinishMicro(); extern void _vu0FinishMicro();

View File

@ -520,7 +520,6 @@ static __pagealigned u8 mVUsearchXMM[0x1000];
// Generates a custom optimized block-search function // Generates a custom optimized block-search function
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this) // Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
void mVUcustomSearch() { void mVUcustomSearch() {
using namespace x86Emitter;
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false); HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
memset_8<0xcc,0x1000>(mVUsearchXMM); memset_8<0xcc,0x1000>(mVUsearchXMM);
xSetPtr(mVUsearchXMM); xSetPtr(mVUsearchXMM);

View File

@ -1969,7 +1969,7 @@ void recVUMI_XTOP( VURegs *VU, int info )
//------------------------------------------------------------------ //------------------------------------------------------------------
// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp // VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp
//------------------------------------------------------------------ //------------------------------------------------------------------
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
{ {
u32 size; u32 size;
u8* data = ((u8*)pMem + (addr&0x3fff)); u8* data = ((u8*)pMem + (addr&0x3fff));

View File

@ -61,7 +61,7 @@ struct _vuopinfo {
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs); void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
extern int vucycle; extern int vucycle;
typedef void (*vFloat)(int regd, int regTemp); typedef void (*vFloat)(int regd, int regTemp);

View File

@ -2550,8 +2550,6 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex)
svutime += (u32)(svufinal.QuadPart - svubase.QuadPart); svutime += (u32)(svufinal.QuadPart - svubase.QuadPart);
#endif #endif
assert(s_vu1esp == 0);
VU = vuindex ? &VU1 : &VU0; VU = vuindex ? &VU1 : &VU0;
VU->cycle += s_TotalVUCycles; VU->cycle += s_TotalVUCycles;
@ -2601,9 +2599,8 @@ __declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex)
mov s_vuedi, edi mov s_vuedi, edi
mov s_vuebx, ebx mov s_vuebx, ebx
#ifdef PCSX2_DEBUG
mov s_vu1esp, esp mov s_vu1esp, esp
#endif and esp, -16 // align stack for GCC compilance
//stmxcsr s_ssecsr //stmxcsr s_ssecsr
ldmxcsr g_sseVUMXCSR ldmxcsr g_sseVUMXCSR
@ -2629,9 +2626,7 @@ __declspec(naked) static void SuperVUEndProgram()
mov edi, s_vuedi mov edi, s_vuedi
mov ebx, s_vuebx mov ebx, s_vuebx
#ifdef PCSX2_DEBUG mov esp, s_vu1esp // restore from aligned stack
sub s_vu1esp, esp
#endif
call SuperVUCleanupProgram call SuperVUCleanupProgram
jmp s_callstack // so returns correctly jmp s_callstack // so returns correctly
@ -4337,11 +4332,11 @@ void recVUMI_XGKICK_(VURegs *VU)
_freeXMMregs(); _freeXMMregs();
OR32ItoM((uptr)&psHu32(GIF_STAT), (GIF_STAT_APATH1 | GIF_STAT_OPH)); // Set PATH1 GIF Status Flags OR32ItoM((uptr)&psHu32(GIF_STAT), (GIF_STAT_APATH1 | GIF_STAT_OPH)); // Set PATH1 GIF Status Flags
PUSH32R(s_XGKICKReg);
PUSH32I((uptr)VU->Mem);
CALLFunc((uptr)VU1XGKICK_MTGSTransfer); xMOV(edx, xRegister32(s_XGKICKReg));
ADD32ItoR(ESP, 8); xMOV(ecx, (uptr)VU->Mem);
xCALL(VU1XGKICK_MTGSTransfer);
AND32ItoM((uptr)&psHu32(GIF_STAT), ~(GIF_STAT_APATH1 | GIF_STAT_OPH)); // Clear PATH1 GIF Status Flags AND32ItoM((uptr)&psHu32(GIF_STAT), ~(GIF_STAT_APATH1 | GIF_STAT_OPH)); // Clear PATH1 GIF Status Flags
s_ScheduleXGKICK = 0; s_ScheduleXGKICK = 0;
} }