diff --git a/common/include/x86emitter/implement/jmpcall.h b/common/include/x86emitter/implement/jmpcall.h index 7f8ef72744..426d4600dc 100644 --- a/common/include/x86emitter/implement/jmpcall.h +++ b/common/include/x86emitter/implement/jmpcall.h @@ -68,5 +68,134 @@ struct xImpl_JmpCall } }; +// yes it is awful. Due to template code is in a header with a nice circular dep. +extern const xImpl_Mov xMOV; +extern const xImpl_JmpCall xCALL; + +struct xImpl_FastCall +{ + // FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop + // some registers. But I think it is enough to handle the first call. + + + // Type unsafety is nice +#ifdef __x86_64__ + +#define XFASTCALL \ + xCALL(func); + +#define XFASTCALL1 \ + xMOV(rdi, a1); \ + xCALL(func); + +#define XFASTCALL2 \ + xMOV(rdi, a1); \ + xMOV(rsi, a2); \ + xCALL(func); + +#else + +#define XFASTCALL \ + xCALL(func); + +#define XFASTCALL1 \ + xMOV(ecx, a1); \ + xCALL(func); + +#define XFASTCALL2 \ + xMOV(ecx, a1); \ + xMOV(edx, a2); \ + xCALL(func); + +#endif + + template< typename T > __fi __always_inline_tmpl_fail + void operator()( T* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const + { +#ifdef __x86_64__ + if (a1.IsEmpty()) { + XFASTCALL; + } else if (a2.IsEmpty()) { + XFASTCALL1; + } else { + XFASTCALL2; + } +#else + if (a1.IsEmpty()) { + XFASTCALL; + } else if (a2.IsEmpty()) { + XFASTCALL1; + } else { + XFASTCALL2; + } +#endif + } + + template< typename T > __fi __always_inline_tmpl_fail + void operator()( T* func, u32 a1, const xRegister32& a2) const + { +#ifdef __x86_64__ + XFASTCALL2; +#else + XFASTCALL2; +#endif + } + + template< typename T > __fi __always_inline_tmpl_fail + void operator()( T* func, const xIndirectVoid& a1) const + { +#ifdef __x86_64__ + XFASTCALL1; +#else + XFASTCALL1; +#endif + } + + template< typename T > __fi __always_inline_tmpl_fail + void operator()( T* func, u32 a1, u32 a2) const + { +#ifdef __x86_64__ + XFASTCALL2; +#else + XFASTCALL2; +#endif + } + + template< typename T > __fi __always_inline_tmpl_fail + void operator()( T* func, u32 a1) const + { +#ifdef __x86_64__ + XFASTCALL1; +#else + XFASTCALL1; +#endif + } + + void operator()(const xIndirect32& func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const + { +#ifdef __x86_64__ + if (a1.IsEmpty()) { + XFASTCALL; + } else if (a2.IsEmpty()) { + XFASTCALL1; + } else { + XFASTCALL2; + } +#else + if (a1.IsEmpty()) { + XFASTCALL; + } else if (a2.IsEmpty()) { + XFASTCALL1; + } else { + XFASTCALL2; + } +#endif + } + +#undef XFASTCALL +#undef XFASTCALL1 +#undef XFASTCALL2 +}; + } // End namespace x86Emitter diff --git a/common/include/x86emitter/instructions.h b/common/include/x86emitter/instructions.h index 8d80012ae9..9441204624 100644 --- a/common/include/x86emitter/instructions.h +++ b/common/include/x86emitter/instructions.h @@ -93,6 +93,7 @@ namespace x86Emitter #else extern const xImpl_JmpCall xCALL; #endif + extern const xImpl_FastCall xFastCall; // ------------------------------------------------------------------------ extern const xImpl_CMov @@ -183,19 +184,15 @@ namespace x86Emitter extern void xINTO(); ////////////////////////////////////////////////////////////////////////////////////////// - // Helper function to handle the various functions ABI - extern void xFastCall(void* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg); - extern void xFastCall(void* func, const xRegisterSSE& a1, const xRegisterSSE& a2); - extern void xFastCall(void* func, u32 a1, u32 a2); - extern void xFastCall(void* func, u32 a1); - - extern void xStdCall(void* func, u32 a1); - + // Helper object to handle the various functions ABI class xScopedStackFrame { bool m_base_frame; + bool m_save_base_pointer; + int m_offset; - xScopedStackFrame(bool base_frame); + public: + xScopedStackFrame(bool base_frame, bool save_base_pointer = false, int offset = 0); ~xScopedStackFrame(); }; diff --git a/common/src/x86emitter/jmp.cpp b/common/src/x86emitter/jmp.cpp index b2b89ef2c2..c56f0b1abc 100644 --- a/common/src/x86emitter/jmp.cpp +++ b/common/src/x86emitter/jmp.cpp @@ -42,6 +42,8 @@ void xImpl_JmpCall::operator()( const xIndirect16& src ) const { xOpWrite( 0x6 const xImpl_JmpCall xJMP = { true }; const xImpl_JmpCall xCALL = { false }; +const xImpl_FastCall xFastCall = { }; + void xSmartJump::SetTarget() { u8* target = xGetPtr(); diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index a1a476e365..8a362ccc96 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -1022,123 +1022,99 @@ __emitinline void xRestoreReg( const xRegisterSSE& dest ) xMOVDQA( dest, ptr[&xmm_data[dest.Id*2]] ); } -////////////////////////////////////////////////////////////////////////////////////////// -// Helper function to handle the various functions ABI - -__emitinline void xFastCall(void *func, const xRegister32& a1, const xRegister32& a2) -{ -#ifdef __x86_64__ - // NEW ABI - pxAssert(0); -#else - if (!a1.IsEmpty()) - xMOV(ecx, a1); - - if (!a2.IsEmpty()) - xMOV(edx, a2); - - xCALL(func); -#endif -} - -__emitinline void xFastCall(void *func, const xRegisterSSE& a1, const xRegisterSSE& a2) -{ -#ifdef __x86_64__ - // NEW ABI - pxAssert(0); -#else - xMOVD(ecx, a1); - xMOVD(edx, a2); - - xCALL(func); -#endif -} - -__emitinline void xFastCall(void *func, u32 a1, u32 a2) -{ -#ifdef __x86_64__ - // NEW ABI - pxAssert(0); -#else - xMOV(ecx, a1); - xMOV(edx, a2); - - xCALL(func); -#endif -} - -__emitinline void xFastCall(void *func, u32 a1) -{ -#ifdef __x86_64__ - // NEW ABI - pxAssert(0); -#else - xMOV(ecx, a1); - - xCALL(func); -#endif -} - -__emitinline void xStdCall(void *func, u32 a1) -{ -#ifdef __x86_64__ - // NEW ABI - pxAssert(0); -#else - // GCC note: unlike C call, GCC doesn't requires - // strict 16B alignment on std call - xPUSH(a1); - xCALL(func); -#endif -} - ////////////////////////////////////////////////////////////////////////////////////////// // Helper object to handle ABI frame +#ifdef __GNUC__ -xScopedStackFrame::xScopedStackFrame(bool base_frame) +#ifdef __x86_64__ +// GCC ensures/requires stack to be 16 bytes aligned (but when?) +#define ALIGN_STACK(v) xADD(rsp, v) +#else +// GCC ensures/requires stack to be 16 bytes aligned before the call +// Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes. +// EBP will take 4 bytes if m_base_frame is enabled +#define ALIGN_STACK(v) xADD(esp, v) +#endif + +#else + +#define ALIGN_STACK(v) + +#endif + +xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset) { m_base_frame = base_frame; + m_save_base_pointer = save_base_pointer; + m_offset = offset; #ifdef __x86_64__ - // NEW ABI - pxAssert(0); + + m_offset += 8; // Call stores the return address (4 bytes) + + // Note rbp can surely be optimized in 64 bits + if (m_base_frame) { + xPUSH( rbp ); + xMOV( rbp, rsp ); + m_offset += 8; + } else if (m_save_base_pointer) { + xPUSH( rbp ); + m_offset += 8; + } + + xPUSH( rbx ); + xPUSH( r12 ); + xPUSH( r13 ); + xPUSH( r14 ); + xPUSH( r15 ); + m_offset += 40; + #else + m_offset += 4; // Call stores the return address (4 bytes) + // Create a new frame if (m_base_frame) { xPUSH( ebp ); xMOV( ebp, esp ); + m_offset += 4; + } else if (m_save_base_pointer) { + xPUSH( ebp ); + m_offset += 4; } // Save the register context xPUSH( edi ); xPUSH( esi ); xPUSH( ebx ); - -#ifdef __GNUC__ - // Realign the stack to 16 byte - if (m_base_frame) { - xSUB( esp, 12); - } -#endif + m_offset += 12; #endif + + ALIGN_STACK(-(16 - m_offset % 16)); } xScopedStackFrame::~xScopedStackFrame() { -#ifdef __x86_64__ - // NEW ABI - pxAssert(0); -#else + ALIGN_STACK(16 - m_offset % 16); -#ifdef __GNUC__ - // Restore the stack (due to the above alignment) - // Potentially it can be restored from ebp +#ifdef __x86_64__ + + // Restore the register context + xPOP( r15 ); + xPOP( r14 ); + xPOP( r13 ); + xPOP( r12 ); + xPOP( rbx ); + + // Destroy the frame if (m_base_frame) { - xADD( esp, 12); + xLEAVE(); + } else if (m_save_base_pointer) { + xPOP( rbp ); } -#endif + +#else // Restore the register context xPOP( ebx ); @@ -1148,6 +1124,8 @@ xScopedStackFrame::~xScopedStackFrame() // Destroy the frame if (m_base_frame) { xLEAVE(); + } else if (m_save_base_pointer) { + xPOP( ebp ); } #endif