Implemented Aligned Stack for microVU and superVU (mVUs is currently ifdef'd for GCC only, since implementing aligned stack for other compilers that don't automatically assume it requires some complexity and overhead).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2072 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-10-24 23:33:11 +00:00
parent 984e79ae03
commit b96ab5621f
11 changed files with 47 additions and 55 deletions

View File

@ -16,6 +16,13 @@
// Micro VU recompiler! - author: cottonvibes(@gmail.com)
#include "PrecompiledHeader.h"
#include "Common.h"
#include "VU.h"
#include "GS.h"
#include "x86emitter/x86emitter.h"
using namespace x86Emitter;
#include "microVU.h"
//------------------------------------------------------------------

View File

@ -17,10 +17,6 @@
//#define mVUdebug // Prints Extra Info to Console
//#define mVUlogProg // Dumps MicroPrograms to \logs\*.html
#include "Common.h"
#include "VU.h"
#include "GS.h"
#include "x86emitter/x86emitter.h"
#include "microVU_IR.h"
#include "microVU_Misc.h"

View File

@ -90,7 +90,6 @@ microVUt(void) mVUsetupBranch(mV, microFlagCycles& mFC) {
}
void normBranchCompile(microVU* mVU, u32 branchPC) {
using namespace x86Emitter;
microBlock* pBlock;
blockCreate(branchPC/8);
pBlock = mVUblocks[branchPC/8]->search((microRegInfo*)&mVUregs);
@ -99,7 +98,6 @@ void normBranchCompile(microVU* mVU, u32 branchPC) {
}
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
using namespace x86Emitter;
memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch(mVU, mFC);
mVUbackupRegs(mVU);
@ -126,7 +124,6 @@ void normBranch(mV, microFlagCycles& mFC) {
}
void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
using namespace x86Emitter;
mVUsetupBranch(mVU, mFC);
xCMP(ptr16[&mVU->branch], 0);
incPC(3);
@ -172,8 +169,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
}
void normJump(mV, microFlagCycles& mFC) {
using namespace x86Emitter;
if (mVUlow.constJump.isValid) { // Jump Address is Constant
if (mVUup.eBit) { // E-bit Jump
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);

View File

@ -333,7 +333,6 @@ microVUt(void) mVUinitFirstPass(microVU* mVU, uptr pState, u8* thisPtr) {
microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
using namespace x86Emitter;
microFlagCycles mFC;
u8* thisPtr = x86Ptr;
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU->microMemSize / 8);
@ -414,7 +413,6 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
// Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr)
microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) {
using namespace x86Emitter;
if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); }
startPC &= mVU->microMemSize-8;

View File

@ -23,18 +23,23 @@
void mVUdispatcherA(mV) {
mVU->startFunct = x86Ptr;
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!isVU1) { CALLFunc((uptr)mVUexecuteVU0); }
else { CALLFunc((uptr)mVUexecuteVU1); }
// Backup cpu state
PUSH32R(EBX);
PUSH32R(EBP);
PUSH32R(ESI);
PUSH32R(EDI);
xPUSH(ebp);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 12);
#endif
// __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xCALL(mVUexecuteVU0); }
else { xCALL(mVUexecuteVU1); }
// Load VU's MXCSR state
SSE_LDMXCSR((uptr)&g_sseVUMXCSR);
xLDMXCSR(&g_sseVUMXCSR);
// Load Regs
#ifdef CHECK_MACROVU0
@ -59,7 +64,7 @@ void mVUdispatcherA(mV) {
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block
JMPR(EAX);
xJMP(eax);
}
// Generates the code to exit from recompiled blocks
@ -67,19 +72,25 @@ void mVUdispatcherB(mV) {
mVU->exitFunct = x86Ptr;
// Load EE's MXCSR state
SSE_LDMXCSR((uptr)&g_sseMXCSR);
xLDMXCSR(&g_sseMXCSR);
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!isVU1) { CALLFunc((uptr)mVUcleanUpVU0); }
else { CALLFunc((uptr)mVUcleanUpVU1); }
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left.
if (!isVU1) { xCALL(mVUcleanUpVU0); }
else { xCALL(mVUcleanUpVU1); }
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
POP32R(EDI);
POP32R(ESI);
POP32R(EBP);
POP32R(EBX);
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
RET();
xRET();
mVUcacheCheck(x86Ptr, mVU->cache, 0x1000);
}
@ -98,7 +109,7 @@ microVUx(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
mVU->cycles = cycles;
mVU->totalCycles = cycles;
x86SetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
xSetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState);
}

View File

@ -1128,7 +1128,7 @@ microVUt(void) mVU_XGKICK_DELAY(mV, bool memVI) {
mVUbackupRegs(mVU);
if (memVI) MOV32MtoR(gprT2, (uptr)&mVU->VIxgkick);
else mVUallocVIa(mVU, gprT2, _Is_);
CALLFunc((uptr)mVU_XGKICK_);
xCALL(mVU_XGKICK_);
mVUrestoreRegs(mVU);
}
@ -1159,7 +1159,6 @@ void setBranchA(mP, int x, int _x_) {
}
void condEvilBranch(mV, int JMPcc) {
using namespace x86Emitter;
if (mVUlow.badBranch) {
xMOV(ptr32[&mVU->branch], eax);
xMOV(ptr32[&mVU->badBranch], branchAddrN);
@ -1202,7 +1201,6 @@ mVUop(mVU_BAL) {
}
mVUop(mVU_IBEQ) {
using namespace x86Emitter;
setBranchA(mX, 3, 0);
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
pass2 {
@ -1219,7 +1217,6 @@ mVUop(mVU_IBEQ) {
}
mVUop(mVU_IBGEZ) {
using namespace x86Emitter;
setBranchA(mX, 4, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 {
@ -1232,7 +1229,6 @@ mVUop(mVU_IBGEZ) {
}
mVUop(mVU_IBGTZ) {
using namespace x86Emitter;
setBranchA(mX, 5, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 {
@ -1245,7 +1241,6 @@ mVUop(mVU_IBGTZ) {
}
mVUop(mVU_IBLEZ) {
using namespace x86Emitter;
setBranchA(mX, 6, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 {
@ -1258,7 +1253,6 @@ mVUop(mVU_IBLEZ) {
}
mVUop(mVU_IBLTZ) {
using namespace x86Emitter;
setBranchA(mX, 7, 0);
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
pass2 {
@ -1271,7 +1265,6 @@ mVUop(mVU_IBLTZ) {
}
mVUop(mVU_IBNE) {
using namespace x86Emitter;
setBranchA(mX, 8, 0);
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
pass2 {

View File

@ -19,8 +19,6 @@
#include "iR5900.h"
#include "R5900OpcodeTables.h"
using namespace x86Emitter;
extern void _vu0WaitMicro();
extern void _vu0FinishMicro();

View File

@ -520,7 +520,6 @@ static __pagealigned u8 mVUsearchXMM[0x1000];
// Generates a custom optimized block-search function
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
void mVUcustomSearch() {
using namespace x86Emitter;
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
memset_8<0xcc,0x1000>(mVUsearchXMM);
xSetPtr(mVUsearchXMM);

View File

@ -1969,7 +1969,7 @@ void recVUMI_XTOP( VURegs *VU, int info )
//------------------------------------------------------------------
// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp
//------------------------------------------------------------------
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
{
u32 size;
u8* data = ((u8*)pMem + (addr&0x3fff));

View File

@ -61,7 +61,7 @@ struct _vuopinfo {
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
extern int vucycle;
typedef void (*vFloat)(int regd, int regTemp);

View File

@ -2550,8 +2550,6 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex)
svutime += (u32)(svufinal.QuadPart - svubase.QuadPart);
#endif
assert(s_vu1esp == 0);
VU = vuindex ? &VU1 : &VU0;
VU->cycle += s_TotalVUCycles;
@ -2601,9 +2599,8 @@ __declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex)
mov s_vuedi, edi
mov s_vuebx, ebx
#ifdef PCSX2_DEBUG
mov s_vu1esp, esp
#endif
and esp, -16 // align stack for GCC compilance
//stmxcsr s_ssecsr
ldmxcsr g_sseVUMXCSR
@ -2629,9 +2626,7 @@ __declspec(naked) static void SuperVUEndProgram()
mov edi, s_vuedi
mov ebx, s_vuebx
#ifdef PCSX2_DEBUG
sub s_vu1esp, esp
#endif
mov esp, s_vu1esp // restore from aligned stack
call SuperVUCleanupProgram
jmp s_callstack // so returns correctly
@ -4337,11 +4332,11 @@ void recVUMI_XGKICK_(VURegs *VU)
_freeXMMregs();
OR32ItoM((uptr)&psHu32(GIF_STAT), (GIF_STAT_APATH1 | GIF_STAT_OPH)); // Set PATH1 GIF Status Flags
PUSH32R(s_XGKICKReg);
PUSH32I((uptr)VU->Mem);
CALLFunc((uptr)VU1XGKICK_MTGSTransfer);
ADD32ItoR(ESP, 8);
xMOV(edx, xRegister32(s_XGKICKReg));
xMOV(ecx, (uptr)VU->Mem);
xCALL(VU1XGKICK_MTGSTransfer);
AND32ItoM((uptr)&psHu32(GIF_STAT), ~(GIF_STAT_APATH1 | GIF_STAT_OPH)); // Clear PATH1 GIF Status Flags
s_ScheduleXGKICK = 0;
}