Align stack to 16-byte boundary when compiling in 32-bit mode with GCC. This is required to run anything on OS X and to support SSE instructions on Linux.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@439 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Maarten ter Huurne 2008-09-05 00:02:53 +00:00
parent f96d19a1fb
commit 722ea233a1
7 changed files with 76 additions and 22 deletions

View File

@ -10,42 +10,48 @@ using namespace Gen;
// ====================================
void ABI_CallFunctionC(void *func, u32 param1) {
ABI_AlignStack(1 * 4);
PUSH(32, Imm32(param1));
CALL(func);
ADD(32, R(ESP), Imm8(4));
ABI_RestoreStack(1 * 4);
}
void ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
ABI_AlignStack(2 * 4);
PUSH(32, Imm32(param2));
PUSH(32, Imm32(param1));
CALL(func);
ADD(32, R(ESP), Imm8(8));
ABI_RestoreStack(2 * 4);
}
// Pass a register as a paremeter.
void ABI_CallFunctionR(void *func, X64Reg reg1) {
ABI_AlignStack(1 * 4);
PUSH(32, R(reg1));
CALL(func);
ADD(32, R(ESP), Imm8(4));
ABI_RestoreStack(1 * 4);
}
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
{
ABI_AlignStack(2 * 4);
PUSH(32, R(reg2));
PUSH(32, R(reg1));
CALL(func);
ADD(32, R(ESP), Imm8(8));
ABI_RestoreStack(2 * 4);
}
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
{
ABI_AlignStack(2 * 4);
PUSH(32, arg1);
PUSH(32, Imm32(param2));
CALL(func);
ADD(32, R(ESP), Imm8(8));
ABI_RestoreStack(2 * 4);
}
void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
// Note: 4 * 4 = 16 bytes, so alignment is preserved.
PUSH(EBP);
PUSH(EBX);
PUSH(ESI);
@ -59,6 +65,38 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
POP(EBP);
}
void ABI_AlignStack(unsigned int frameSize) {
// Mac OS X requires the stack to be 16-byte aligned before every call.
// Linux requires the stack to be 16-byte aligned before calls that put SSE
// vectors on the stack, but since we do not keep track of which calls do that,
// it is effectively every call as well.
// Windows binaries compiled with MSVC do not have such a restriction, but I
// expect that GCC on Windows acts the same as GCC on Linux in this respect.
// It would be nice if someone could verify this.
#ifdef __GNUC__
frameSize += 4; // reserve space for return address
unsigned int paddedSize = (frameSize + 15) & -16;
unsigned int fillSize = paddedSize - frameSize;
if (fillSize != 0) {
SUB(32, R(ESP), Imm8(fillSize));
}
#endif
}
void ABI_RestoreStack(unsigned int frameSize) {
frameSize += 4; // reserve space for return address
unsigned int paddedSize =
#ifdef __GNUC__
(frameSize + 15) & -16;
#else
frameSize;
#endif
paddedSize -= 4; // return address is already consumed
if (paddedSize != 0) {
ADD(32, R(ESP), Imm8(paddedSize));
}
}
#else
// Shared code between Win64 and Unix64
@ -99,6 +137,12 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
CALL(func);
}
void ABI_AlignStack(unsigned int /*frameSize*/) {
}
void ABI_RestoreStack(unsigned int /*frameSize*/) {
}
#ifdef _WIN32
// Win64 Specific Code

View File

@ -103,6 +103,9 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack();
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();
void ABI_AlignStack(unsigned int frameSize);
void ABI_RestoreStack(unsigned int frameSize);
#ifdef _M_IX86
inline int ABI_GetNumXMMRegs() { return 8; }
#else

View File

@ -136,13 +136,13 @@ void *ProtectFunction(void *function, int num_params)
// Since parameters are in the previous stack frame, not in registers, this takes some
// trickery : we simply re-push the parameters. might not be optimal, but that doesn't really
// matter.
ABI_AlignStack(num_params * 4);
for (int i = 0; i < num_params; i++) {
// ESP is changing, so we do not need i
PUSH(32, MDisp(ESP, (num_params) * 4));
PUSH(32, MDisp(ESP, num_params * 4));
}
CALL(function);
if (num_params)
ADD(32, R(ESP), Imm8(num_params * 4));
ABI_RestoreStack(num_params * 4);
CALL((void*)load_regs);
RET();
#endif

View File

@ -1317,6 +1317,7 @@ void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2)
#endif
#else
ABI_AlignStack(3 * 4);
PUSH(32, Imm32(arg2));
PUSH(32, Imm32(arg1));
PUSH(32, Imm32(arg0));
@ -1324,7 +1325,7 @@ void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2)
#ifdef _WIN32
// don't inc stack
#else
ADD(32, R(ESP), Imm8(3 * 4));
ABI_RestoreStack(3 * 4);
#endif
#endif
}
@ -1349,6 +1350,7 @@ void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3)
#endif
#else
ABI_AlignStack(4 * 4);
PUSH(32, Imm32(arg3));
PUSH(32, Imm32(arg2));
PUSH(32, Imm32(arg1));
@ -1357,7 +1359,7 @@ void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3)
#ifdef _WIN32
// don't inc stack
#else
ADD(32, R(ESP), Imm8(4 * 4));
ABI_RestoreStack(4 * 4);
#endif
#endif
}
@ -1384,6 +1386,7 @@ void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32
#endif
#else
ABI_AlignStack(5 * 4);
PUSH(32, Imm32(arg4));
PUSH(32, Imm32(arg3));
PUSH(32, Imm32(arg2));
@ -1393,7 +1396,7 @@ void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32
#ifdef _WIN32
// don't inc stack
#else
ADD(32, R(ESP), Imm8(5 * 4));
ABI_RestoreStack(5 * 4);
#endif
#endif
}
@ -1422,6 +1425,7 @@ void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32
#endif
#else
ABI_AlignStack(6 * 4);
PUSH(32, Imm32(arg5));
PUSH(32, Imm32(arg4));
PUSH(32, Imm32(arg3));
@ -1432,7 +1436,7 @@ void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32
#ifdef _WIN32
// don't inc stack
#else
ADD(32, R(ESP), Imm8(6 * 4));
ABI_RestoreStack(6 * 4);
#endif
#endif
}

View File

@ -137,9 +137,10 @@ void Generate()
SetJumpTarget(notfound);
//Ok, no block, let's jit
ABI_AlignStack(4);
PUSH(32, M(&PowerPC::ppcState.pc));
CALL(reinterpret_cast<void *>(&Jit));
ADD(32, R(ESP), Imm8(4));
ABI_RestoreStack(4);
JMP(dispatcherNoCheck); // no point in special casing this
//FP blocks test for FPU available, jump here if false

View File

@ -342,9 +342,10 @@ void VertexLoader::Compile()
for (int i = 0; i < m_numPipelineStates; i++)
{
ABI_AlignStack(1 * 4);
PUSH(32, Imm32((u32)&m_VtxAttr));
CALL(m_PipelineStates[i]);
ADD(32, R(ESP), Imm8(4));
ABI_RestoreStack(1 * 4);
}
ADD(32, M(&varray->count), Imm8(1));

View File

@ -21,6 +21,7 @@
#include "Common.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "Profiler.h"
#include "StringUtil.h"
@ -536,21 +537,21 @@ void VertexLoader::ProcessFormat()
int id = GL_TEXTURE0+i;
#ifdef _M_X64
#ifdef _MSC_VER
MOV(32, R(RCX), Imm32(id));
CALL((void *)glClientActiveTexture);
#else
MOV(32, R(RDI), Imm32(id));
CALL((void *)glClientActiveTexture);
#endif
#else
ABI_AlignStack(1 * 4);
PUSH(32, Imm32(id));
#endif
CALL((void *)glClientActiveTexture);
// don't inc stack on windows, stdcall
#ifndef _WIN32
ADD(32, R(ESP), Imm8(4));
#ifndef _M_X64
#ifdef _WIN32
// don't inc stack on windows, stdcall
#else
ABI_RestoreStack(1 * 4);
#endif
#endif
if( m_components&(VB_HAS_TEXMTXIDX0<<i) ) {