From 722ea233a1239461f7b23e2ac334747604cae838 Mon Sep 17 00:00:00 2001 From: Maarten ter Huurne Date: Fri, 5 Sep 2008 00:02:53 +0000 Subject: [PATCH] Align stack to 16-byte boundary when compiling in 32-bit mode with GCC. This is required to run anything on OS X and to support SSE instructions on Linux. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@439 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/ABI.cpp | 54 +++++++++++++++++-- Source/Core/Common/Src/ABI.h | 3 ++ Source/Core/Common/Src/Thunk.cpp | 8 +-- Source/Core/Common/Src/x64Emitter.cpp | 12 +++-- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 3 +- .../Plugin_VideoDX9/Src/VertexLoader.cpp | 3 +- .../Plugin_VideoOGL/Src/VertexLoader.cpp | 15 +++--- 7 files changed, 76 insertions(+), 22 deletions(-) diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp index 034c2b77e3..d41225a964 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/ABI.cpp @@ -10,42 +10,48 @@ using namespace Gen; // ==================================== void ABI_CallFunctionC(void *func, u32 param1) { + ABI_AlignStack(1 * 4); PUSH(32, Imm32(param1)); CALL(func); - ADD(32, R(ESP), Imm8(4)); + ABI_RestoreStack(1 * 4); } void ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { + ABI_AlignStack(2 * 4); PUSH(32, Imm32(param2)); PUSH(32, Imm32(param1)); CALL(func); - ADD(32, R(ESP), Imm8(8)); + ABI_RestoreStack(2 * 4); } // Pass a register as a paremeter. void ABI_CallFunctionR(void *func, X64Reg reg1) { + ABI_AlignStack(1 * 4); PUSH(32, R(reg1)); CALL(func); - ADD(32, R(ESP), Imm8(4)); + ABI_RestoreStack(1 * 4); } void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2) { + ABI_AlignStack(2 * 4); PUSH(32, R(reg2)); PUSH(32, R(reg1)); CALL(func); - ADD(32, R(ESP), Imm8(8)); + ABI_RestoreStack(2 * 4); } void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { + ABI_AlignStack(2 * 4); PUSH(32, arg1); PUSH(32, Imm32(param2)); CALL(func); - ADD(32, R(ESP), Imm8(8)); + ABI_RestoreStack(2 * 4); } void ABI_PushAllCalleeSavedRegsAndAdjustStack() { + // Note: 4 * 4 = 16 bytes, so alignment is preserved. PUSH(EBP); PUSH(EBX); PUSH(ESI); @@ -59,6 +65,38 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack() { POP(EBP); } +void ABI_AlignStack(unsigned int frameSize) { +// Mac OS X requires the stack to be 16-byte aligned before every call. +// Linux requires the stack to be 16-byte aligned before calls that put SSE +// vectors on the stack, but since we do not keep track of which calls do that, +// it is effectively every call as well. +// Windows binaries compiled with MSVC do not have such a restriction, but I +// expect that GCC on Windows acts the same as GCC on Linux in this respect. +// It would be nice if someone could verify this. +#ifdef __GNUC__ + frameSize += 4; // reserve space for return address + unsigned int paddedSize = (frameSize + 15) & -16; + unsigned int fillSize = paddedSize - frameSize; + if (fillSize != 0) { + SUB(32, R(ESP), Imm8(fillSize)); + } +#endif +} + +void ABI_RestoreStack(unsigned int frameSize) { + frameSize += 4; // reserve space for return address + unsigned int paddedSize = +#ifdef __GNUC__ + (frameSize + 15) & -16; +#else + frameSize; +#endif + paddedSize -= 4; // return address is already consumed + if (paddedSize != 0) { + ADD(32, R(ESP), Imm8(paddedSize)); + } +} + #else // Shared code between Win64 and Unix64 @@ -99,6 +137,12 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) CALL(func); } +void ABI_AlignStack(unsigned int /*frameSize*/) { +} + +void ABI_RestoreStack(unsigned int /*frameSize*/) { +} + #ifdef _WIN32 // Win64 Specific Code diff --git a/Source/Core/Common/Src/ABI.h b/Source/Core/Common/Src/ABI.h index 2c6f59fac6..b84e92bef9 100644 --- a/Source/Core/Common/Src/ABI.h +++ b/Source/Core/Common/Src/ABI.h @@ -103,6 +103,9 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack(); void ABI_PushAllCallerSavedRegsAndAdjustStack(); void ABI_PopAllCallerSavedRegsAndAdjustStack(); +void ABI_AlignStack(unsigned int frameSize); +void ABI_RestoreStack(unsigned int frameSize); + #ifdef _M_IX86 inline int ABI_GetNumXMMRegs() { return 8; } #else diff --git a/Source/Core/Common/Src/Thunk.cpp b/Source/Core/Common/Src/Thunk.cpp index 8dcf6ef35a..086ad66cbb 100644 --- a/Source/Core/Common/Src/Thunk.cpp +++ b/Source/Core/Common/Src/Thunk.cpp @@ -136,17 +136,17 @@ void *ProtectFunction(void *function, int num_params) // Since parameters are in the previous stack frame, not in registers, this takes some // trickery : we simply re-push the parameters. might not be optimal, but that doesn't really // matter. + ABI_AlignStack(num_params * 4); for (int i = 0; i < num_params; i++) { // ESP is changing, so we do not need i - PUSH(32, MDisp(ESP, (num_params) * 4)); + PUSH(32, MDisp(ESP, num_params * 4)); } CALL(function); - if (num_params) - ADD(32, R(ESP), Imm8(num_params * 4)); + ABI_RestoreStack(num_params * 4); CALL((void*)load_regs); RET(); #endif thunks[function] = call_point; return (void *)call_point; -} +} diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 2eea4a05c2..6f69e31fd3 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1317,6 +1317,7 @@ void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2) #endif #else + ABI_AlignStack(3 * 4); PUSH(32, Imm32(arg2)); PUSH(32, Imm32(arg1)); PUSH(32, Imm32(arg0)); @@ -1324,7 +1325,7 @@ void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2) #ifdef _WIN32 // don't inc stack #else - ADD(32, R(ESP), Imm8(3 * 4)); + ABI_RestoreStack(3 * 4); #endif #endif } @@ -1349,6 +1350,7 @@ void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) #endif #else + ABI_AlignStack(4 * 4); PUSH(32, Imm32(arg3)); PUSH(32, Imm32(arg2)); PUSH(32, Imm32(arg1)); @@ -1357,7 +1359,7 @@ void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) #ifdef _WIN32 // don't inc stack #else - ADD(32, R(ESP), Imm8(4 * 4)); + ABI_RestoreStack(4 * 4); #endif #endif } @@ -1384,6 +1386,7 @@ void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 #endif #else + ABI_AlignStack(5 * 4); PUSH(32, Imm32(arg4)); PUSH(32, Imm32(arg3)); PUSH(32, Imm32(arg2)); @@ -1393,7 +1396,7 @@ void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 #ifdef _WIN32 // don't inc stack #else - ADD(32, R(ESP), Imm8(5 * 4)); + ABI_RestoreStack(5 * 4); #endif #endif } @@ -1422,6 +1425,7 @@ void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 #endif #else + ABI_AlignStack(6 * 4); PUSH(32, Imm32(arg5)); PUSH(32, Imm32(arg4)); PUSH(32, Imm32(arg3)); @@ -1432,7 +1436,7 @@ void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 #ifdef _WIN32 // don't inc stack #else - ADD(32, R(ESP), Imm8(6 * 4)); + ABI_RestoreStack(6 * 4); #endif #endif } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index cf6bddd277..2959f3544a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -137,9 +137,10 @@ void Generate() SetJumpTarget(notfound); //Ok, no block, let's jit + ABI_AlignStack(4); PUSH(32, M(&PowerPC::ppcState.pc)); CALL(reinterpret_cast(&Jit)); - ADD(32, R(ESP), Imm8(4)); + ABI_RestoreStack(4); JMP(dispatcherNoCheck); // no point in special casing this //FP blocks test for FPU available, jump here if false diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexLoader.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexLoader.cpp index 674da64d73..3fa3e17042 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexLoader.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexLoader.cpp @@ -342,9 +342,10 @@ void VertexLoader::Compile() for (int i = 0; i < m_numPipelineStates; i++) { + ABI_AlignStack(1 * 4); PUSH(32, Imm32((u32)&m_VtxAttr)); CALL(m_PipelineStates[i]); - ADD(32, R(ESP), Imm8(4)); + ABI_RestoreStack(1 * 4); } ADD(32, M(&varray->count), Imm8(1)); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp index 73fe9e9750..f88ad64741 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp @@ -21,6 +21,7 @@ #include "Common.h" #include "x64Emitter.h" +#include "ABI.h" #include "Profiler.h" #include "StringUtil.h" @@ -536,21 +537,21 @@ void VertexLoader::ProcessFormat() int id = GL_TEXTURE0+i; #ifdef _M_X64 - #ifdef _MSC_VER MOV(32, R(RCX), Imm32(id)); - CALL((void *)glClientActiveTexture); #else MOV(32, R(RDI), Imm32(id)); - CALL((void *)glClientActiveTexture); #endif - #else + ABI_AlignStack(1 * 4); PUSH(32, Imm32(id)); +#endif CALL((void *)glClientActiveTexture); - // don't inc stack on windows, stdcall -#ifndef _WIN32 - ADD(32, R(ESP), Imm8(4)); +#ifndef _M_X64 +#ifdef _WIN32 + // don't inc stack on windows, stdcall +#else + ABI_RestoreStack(1 * 4); #endif #endif if( m_components&(VB_HAS_TEXMTXIDX0<