x86emitter: improve the various abi wrapper

V2:
* Add various option to handle the different frame
* Basic x86-64 port (untested so likely completely broken)

v3:
Create an templated xImpl_FastCall object

v4:
clang compilation fix
This commit is contained in:
Gregory Hainaut 2015-12-03 20:15:52 +01:00
parent 52b4604d3b
commit 6e66bea152
4 changed files with 204 additions and 98 deletions

View File

@ -68,5 +68,134 @@ struct xImpl_JmpCall
}
};
// yes it is awful. Due to template code is in a header with a nice circular dep.
extern const xImpl_Mov xMOV;
extern const xImpl_JmpCall xCALL;
struct xImpl_FastCall
{
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
// some registers. But I think it is enough to handle the first call.
// Type unsafety is nice
#ifdef __x86_64__
#define XFASTCALL \
xCALL(func);
#define XFASTCALL1 \
xMOV(rdi, a1); \
xCALL(func);
#define XFASTCALL2 \
xMOV(rdi, a1); \
xMOV(rsi, a2); \
xCALL(func);
#else
#define XFASTCALL \
xCALL(func);
#define XFASTCALL1 \
xMOV(ecx, a1); \
xCALL(func);
#define XFASTCALL2 \
xMOV(ecx, a1); \
xMOV(edx, a2); \
xCALL(func);
#endif
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const
{
#ifdef __x86_64__
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1, const xRegister32& a2) const
{
#ifdef __x86_64__
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, const xIndirectVoid& a1) const
{
#ifdef __x86_64__
XFASTCALL1;
#else
XFASTCALL1;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1, u32 a2) const
{
#ifdef __x86_64__
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1) const
{
#ifdef __x86_64__
XFASTCALL1;
#else
XFASTCALL1;
#endif
}
void operator()(const xIndirect32& func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const
{
#ifdef __x86_64__
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
#undef XFASTCALL
#undef XFASTCALL1
#undef XFASTCALL2
};
} // End namespace x86Emitter

View File

@ -93,6 +93,7 @@ namespace x86Emitter
#else
extern const xImpl_JmpCall xCALL;
#endif
extern const xImpl_FastCall xFastCall;
// ------------------------------------------------------------------------
extern const xImpl_CMov
@ -183,19 +184,15 @@ namespace x86Emitter
extern void xINTO();
//////////////////////////////////////////////////////////////////////////////////////////
// Helper function to handle the various functions ABI
extern void xFastCall(void* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg);
extern void xFastCall(void* func, const xRegisterSSE& a1, const xRegisterSSE& a2);
extern void xFastCall(void* func, u32 a1, u32 a2);
extern void xFastCall(void* func, u32 a1);
extern void xStdCall(void* func, u32 a1);
// Helper object to handle the various functions ABI
class xScopedStackFrame
{
bool m_base_frame;
bool m_save_base_pointer;
int m_offset;
xScopedStackFrame(bool base_frame);
public:
xScopedStackFrame(bool base_frame, bool save_base_pointer = false, int offset = 0);
~xScopedStackFrame();
};

View File

@ -42,6 +42,8 @@ void xImpl_JmpCall::operator()( const xIndirect16& src ) const { xOpWrite( 0x6
const xImpl_JmpCall xJMP = { true };
const xImpl_JmpCall xCALL = { false };
const xImpl_FastCall xFastCall = { };
void xSmartJump::SetTarget()
{
u8* target = xGetPtr();

View File

@ -1022,123 +1022,99 @@ __emitinline void xRestoreReg( const xRegisterSSE& dest )
xMOVDQA( dest, ptr[&xmm_data[dest.Id*2]] );
}
//////////////////////////////////////////////////////////////////////////////////////////
// Helper function to handle the various functions ABI
__emitinline void xFastCall(void *func, const xRegister32& a1, const xRegister32& a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
if (!a1.IsEmpty())
xMOV(ecx, a1);
if (!a2.IsEmpty())
xMOV(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, const xRegisterSSE& a1, const xRegisterSSE& a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOVD(ecx, a1);
xMOVD(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, u32 a1, u32 a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOV(ecx, a1);
xMOV(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, u32 a1)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOV(ecx, a1);
xCALL(func);
#endif
}
__emitinline void xStdCall(void *func, u32 a1)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
// GCC note: unlike C call, GCC doesn't requires
// strict 16B alignment on std call
xPUSH(a1);
xCALL(func);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////
// Helper object to handle ABI frame
#ifdef __GNUC__
xScopedStackFrame::xScopedStackFrame(bool base_frame)
#ifdef __x86_64__
// GCC ensures/requires stack to be 16 bytes aligned (but when?)
#define ALIGN_STACK(v) xADD(rsp, v)
#else
// GCC ensures/requires stack to be 16 bytes aligned before the call
// Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes.
// EBP will take 4 bytes if m_base_frame is enabled
#define ALIGN_STACK(v) xADD(esp, v)
#endif
#else
#define ALIGN_STACK(v)
#endif
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset)
{
m_base_frame = base_frame;
m_save_base_pointer = save_base_pointer;
m_offset = offset;
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
m_offset += 8; // Call stores the return address (4 bytes)
// Note rbp can surely be optimized in 64 bits
if (m_base_frame) {
xPUSH( rbp );
xMOV( rbp, rsp );
m_offset += 8;
} else if (m_save_base_pointer) {
xPUSH( rbp );
m_offset += 8;
}
xPUSH( rbx );
xPUSH( r12 );
xPUSH( r13 );
xPUSH( r14 );
xPUSH( r15 );
m_offset += 40;
#else
m_offset += 4; // Call stores the return address (4 bytes)
// Create a new frame
if (m_base_frame) {
xPUSH( ebp );
xMOV( ebp, esp );
m_offset += 4;
} else if (m_save_base_pointer) {
xPUSH( ebp );
m_offset += 4;
}
// Save the register context
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
#ifdef __GNUC__
// Realign the stack to 16 byte
if (m_base_frame) {
xSUB( esp, 12);
}
#endif
m_offset += 12;
#endif
ALIGN_STACK(-(16 - m_offset % 16));
}
xScopedStackFrame::~xScopedStackFrame()
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
ALIGN_STACK(16 - m_offset % 16);
#ifdef __GNUC__
// Restore the stack (due to the above alignment)
// Potentially it can be restored from ebp
#ifdef __x86_64__
// Restore the register context
xPOP( r15 );
xPOP( r14 );
xPOP( r13 );
xPOP( r12 );
xPOP( rbx );
// Destroy the frame
if (m_base_frame) {
xADD( esp, 12);
xLEAVE();
} else if (m_save_base_pointer) {
xPOP( rbp );
}
#endif
#else
// Restore the register context
xPOP( ebx );
@ -1148,6 +1124,8 @@ xScopedStackFrame::~xScopedStackFrame()
// Destroy the frame
if (m_base_frame) {
xLEAVE();
} else if (m_save_base_pointer) {
xPOP( ebp );
}
#endif