diff --git a/Source/Core/Common/Src/x64ABI.cpp b/Source/Core/Common/Src/x64ABI.cpp index b5726947fc..c47ea02fb4 100644 --- a/Source/Core/Common/Src/x64ABI.cpp +++ b/Source/Core/Common/Src/x64ABI.cpp @@ -10,51 +10,38 @@ using namespace Gen; // Shared code between Win64 and Unix64 -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog) { - // On platforms other than Windows 32-bit: At the beginning of a function, - // the stack pointer is 4/8 bytes less than a multiple of 16; however, the - // function prolog immediately subtracts an appropriate amount to align - // it, so no alignment is required around a call. - // In the functions generated by ThunkManager::ProtectFunction, we add the - // necessary subtraction (and 0x20 bytes shadow space for Win64) into this - // rather than having a separate prolog. - // On Windows 32-bit, the required alignment is only 4 bytes, so we just - // ensure that the frame size isn't misaligned. -#ifdef _M_X64 - // expect frameSize == 0 - frameSize = noProlog ? 0x28 : 0; -#elif defined(_WIN32) - frameSize = (frameSize + 3) & -4; -#else - unsigned int existingAlignment = noProlog ? 0xc : 0; - frameSize -= existingAlignment; - frameSize = (frameSize + 15) & -16; - frameSize += existingAlignment; +// Sets up a __cdecl function. +void XEmitter::ABI_EmitPrologue(int maxCallParams) +{ +#ifdef _M_IX86 + // Don't really need to do anything +#elif defined(_M_X64) +#if _WIN32 + int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8; + // Set up a stack frame so that we can call functions + // TODO: use maxCallParams + SUB(64, R(RSP), Imm8(stacksize)); +#endif +#else +#error Arch not supported #endif - return frameSize; } -void XEmitter::ABI_AlignStack(unsigned int frameSize, bool noProlog) { - unsigned int fillSize = - ABI_GetAlignedFrameSize(frameSize, noProlog) - frameSize; - if (fillSize != 0) { -#ifdef _M_X64 - SUB(64, R(RSP), Imm8(fillSize)); -#else - SUB(32, R(ESP), Imm8(fillSize)); +void XEmitter::ABI_EmitEpilogue(int maxCallParams) +{ +#ifdef _M_IX86 + RET(); +#elif defined(_M_X64) +#ifdef _WIN32 + int stacksize = ((maxCallParams+1)&~1)*8 + 8; + ADD(64, R(RSP), Imm8(stacksize)); #endif - } -} + RET(); +#else +#error Arch not supported + -void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) { - unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize, noProlog); - if (alignedSize != 0) { -#ifdef _M_X64 - ADD(64, R(RSP), Imm8(alignedSize)); -#else - ADD(32, R(ESP), Imm8(alignedSize)); #endif - } } #ifdef _M_IX86 // All32 @@ -78,7 +65,7 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { PUSH(16, Imm16(param2)); PUSH(32, Imm32(param1)); CALL(func); - ABI_AlignStack(1 * 2 + 1 * 4); + ABI_RestoreStack(1 * 2 + 1 * 4); } void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { @@ -169,27 +156,60 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) } void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + // Note: 4 * 4 = 16 bytes, so alignment is preserved. PUSH(EBP); - MOV(32, R(EBP), R(ESP)); PUSH(EBX); PUSH(ESI); PUSH(EDI); - SUB(32, R(ESP), Imm8(0xc)); } void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - ADD(32, R(ESP), Imm8(0xc)); POP(EDI); POP(ESI); POP(EBX); POP(EBP); } +unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { + frameSize += 4; // reserve space for return address + unsigned int alignedSize = +#ifdef __GNUC__ + (frameSize + 15) & -16; +#else + (frameSize + 3) & -4; +#endif + return alignedSize; +} + + +void XEmitter::ABI_AlignStack(unsigned int frameSize) { +// Mac OS X requires the stack to be 16-byte aligned before every call. +// Linux requires the stack to be 16-byte aligned before calls that put SSE +// vectors on the stack, but since we do not keep track of which calls do that, +// it is effectively every call as well. +// Windows binaries compiled with MSVC do not have such a restriction*, but I +// expect that GCC on Windows acts the same as GCC on Linux in this respect. +// It would be nice if someone could verify this. +// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times. + unsigned int fillSize = + ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4); + if (fillSize != 0) { + SUB(32, R(ESP), Imm8(fillSize)); + } +} + +void XEmitter::ABI_RestoreStack(unsigned int frameSize) { + unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize); + alignedSize -= 4; // return address is POPped at end of call + if (alignedSize != 0) { + ADD(32, R(ESP), Imm8(alignedSize)); + } +} + #else //64bit // Common functions void XEmitter::ABI_CallFunction(void *func) { - ABI_AlignStack(0); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { @@ -199,11 +219,9 @@ void XEmitter::ABI_CallFunction(void *func) { } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL @@ -214,11 +232,9 @@ void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -230,11 +246,9 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL @@ -245,11 +259,9 @@ void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -261,11 +273,9 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -278,11 +288,9 @@ void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(64, R(ABI_PARAM3), Imm64((u64)param3)); @@ -295,11 +303,9 @@ void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *par } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 param3, void *param4) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -313,11 +319,9 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) { - ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -330,12 +334,10 @@ void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 p } else { CALL(func); } - ABI_RestoreStack(0); } // Pass a register as a parameter. void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { - ABI_AlignStack(0); if (reg1 != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R(reg1)); u64 distance = u64(func) - (u64(code) + 5); @@ -347,12 +349,10 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { } else { CALL(func); } - ABI_RestoreStack(0); } // Pass two registers as parameters. void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { - ABI_AlignStack(0); if (reg2 != ABI_PARAM1) { if (reg1 != ABI_PARAM1) MOV(64, R(ABI_PARAM1), R(reg1)); @@ -373,12 +373,10 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { - ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); MOV(32, R(ABI_PARAM2), Imm32(param2)); @@ -391,12 +389,10 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2 } else { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) { - ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); u64 distance = u64(func) - (u64(code) + 5); @@ -408,7 +404,95 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) } else { CALL(func); } +} + +unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { + return frameSize; +} + +#ifdef _WIN32 + +// Win64 Specific Code +void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + //we only want to do this once + PUSH(RBX); + PUSH(RSI); + PUSH(RDI); + PUSH(RBP); + PUSH(R12); + PUSH(R13); + PUSH(R14); + PUSH(R15); + //TODO: Also preserve XMM0-3? + ABI_AlignStack(0); +} + +void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { ABI_RestoreStack(0); + POP(R15); + POP(R14); + POP(R13); + POP(R12); + POP(RBP); + POP(RDI); + POP(RSI); + POP(RBX); +} + +// Win64 Specific Code +void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { + PUSH(RCX); + PUSH(RDX); + PUSH(RSI); + PUSH(RDI); + PUSH(R8); + PUSH(R9); + PUSH(R10); + PUSH(R11); + //TODO: Also preserve XMM0-15? + ABI_AlignStack(0); +} + +void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { + ABI_RestoreStack(0); + POP(R11); + POP(R10); + POP(R9); + POP(R8); + POP(RDI); + POP(RSI); + POP(RDX); + POP(RCX); +} + +void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { + SUB(64, R(RSP), Imm8(0x28)); +} + +void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { + ADD(64, R(RSP), Imm8(0x28)); +} + +#else +// Unix64 Specific Code +void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + PUSH(RBX); + PUSH(RBP); + PUSH(R12); + PUSH(R13); + PUSH(R14); + PUSH(R15); + PUSH(R15); //just to align stack. duped push/pop doesn't hurt. +} + +void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { + POP(R15); + POP(R15); + POP(R14); + POP(R13); + POP(R12); + POP(RBP); + POP(RBX); } void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { @@ -435,59 +519,12 @@ void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { POP(RCX); } - -#ifdef _WIN32 -// Win64 Specific Code - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - //we only want to do this once - PUSH(RBP); - MOV(64, R(RBP), R(RSP)); - PUSH(RBX); - PUSH(RSI); - PUSH(RDI); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - SUB(64, R(RSP), Imm8(0x28)); - //TODO: Also preserve XMM0-3? +void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { + SUB(64, R(RSP), Imm8(0x08)); } -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - ADD(64, R(RSP), Imm8(0x28)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RDI); - POP(RSI); - POP(RBX); - POP(RBP); -} - -#else -// Unix64 Specific Code - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - PUSH(RBP); - MOV(64, R(RBP), R(RSP)); - PUSH(RBX); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - SUB(64, R(RSP), Imm8(8)); -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - ADD(64, R(RSP), Imm8(8)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RBX); - POP(RBP); +void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { + ADD(64, R(RSP), Imm8(0x08)); } #endif // WIN32 diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index c42c955ec4..b068579a3a 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -652,9 +652,14 @@ public: void ABI_PushAllCallerSavedRegsAndAdjustStack(); void ABI_PopAllCallerSavedRegsAndAdjustStack(); - unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); - void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); - void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); + unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); + void ABI_AlignStack(unsigned int frameSize); + void ABI_RestoreStack(unsigned int frameSize); + + // Sets up a __cdecl function. + // Only x64 really needs the parameter count. + void ABI_EmitPrologue(int maxCallParams); + void ABI_EmitEpilogue(int maxCallParams); #ifdef _M_IX86 inline int ABI_GetNumXMMRegs() { return 8; } diff --git a/Source/Core/Common/Src/x64Thunk.cpp b/Source/Core/Common/Src/x64Thunk.cpp index a9c19060de..efb876ce81 100644 --- a/Source/Core/Common/Src/x64Thunk.cpp +++ b/Source/Core/Common/Src/x64Thunk.cpp @@ -91,27 +91,35 @@ void *ThunkManager::ProtectFunction(void *function, int num_params) PanicAlert("Trying to protect functions before the emu is started. Bad bad bad."); const u8 *call_point = GetCodePtr(); -#ifdef _M_X64 // Make sure to align stack. - ABI_AlignStack(0, true); +#ifdef _M_X64 +#ifdef _WIN32 + SUB(64, R(ESP), Imm8(0x28)); +#else + SUB(64, R(ESP), Imm8(0x8)); +#endif CALL((void*)save_regs); CALL((void*)function); CALL((void*)load_regs); - ABI_RestoreStack(0, true); +#ifdef _WIN32 + ADD(64, R(ESP), Imm8(0x28)); +#else + ADD(64, R(ESP), Imm8(0x8)); +#endif RET(); #else CALL((void*)save_regs); // Since parameters are in the previous stack frame, not in registers, this takes some // trickery : we simply re-push the parameters. might not be optimal, but that doesn't really // matter. - ABI_AlignStack(num_params * 4, true); + ABI_AlignStack(num_params * 4); unsigned int alignedSize = ABI_GetAlignedFrameSize(num_params * 4); for (int i = 0; i < num_params; i++) { // ESP is changing, so we do not need i - PUSH(32, MDisp(ESP, alignedSize)); + PUSH(32, MDisp(ESP, alignedSize - 4)); } CALL(function); - ABI_RestoreStack(num_params * 4, true); + ABI_RestoreStack(num_params * 4); CALL((void*)load_regs); RET(); #endif diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 825e9c6558..babaff83a5 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -217,7 +217,7 @@ void VertexLoader::CompileVertexTranslator() PanicAlert("Trying to recompile a vertex translator"); m_compiledCode = GetCodePtr(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + ABI_EmitPrologue(4); // Start loop here const u8 *loop_start = GetCodePtr(); @@ -499,8 +499,7 @@ void VertexLoader::CompileVertexTranslator() #endif J_CC(CC_NZ, loop_start, true); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); - RET(); + ABI_EmitEpilogue(4); #endif m_NativeFmt->Initialize(vtx_decl); } diff --git a/Source/Core/VideoCommon/Src/x64DLCache.cpp b/Source/Core/VideoCommon/Src/x64DLCache.cpp index b412c2e274..ae32ea5ea9 100644 --- a/Source/Core/VideoCommon/Src/x64DLCache.cpp +++ b/Source/Core/VideoCommon/Src/x64DLCache.cpp @@ -409,7 +409,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) emitter.AlignCode4(); dl->compiled_code = emitter.GetCodePtr(); - emitter.ABI_PushAllCalleeSavedRegsAndAdjustStack(); + emitter.ABI_EmitPrologue(4); while (g_pVideoData < end) { @@ -572,8 +572,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) break; } } - emitter.ABI_PopAllCalleeSavedRegsAndAdjustStack(); - emitter.RET(); + emitter.ABI_EmitEpilogue(4); INCSTAT(stats.numDListsCalled); INCSTAT(stats.thisFrame.numDListsCalled); Statistics::SwapDL();