Fix codegen on x86-64 (#3512)

Fix codegen on x86-64

Part 1 of the changes being worked on in #3451

Makes x86emitter emit the x86-64 machine code you would expect it to

Also adds some unit tests to verify that things are working
This commit is contained in:
tellowkrinkle 2020-08-19 03:19:28 -05:00 committed by GitHub
parent 61f3258b96
commit 4f56db9f18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 784 additions and 310 deletions

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
[submodule "3rdparty/xz/xz"]
path = 3rdparty/xz/xz
url = https://github.com/PCSX2/xz.git
[submodule "3rdparty/gtest"]
path = 3rdparty/gtest
url = https://github.com/google/googletest.git

1
3rdparty/gtest vendored Submodule

@ -0,0 +1 @@
Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e

View File

@ -57,6 +57,12 @@ if(EXISTS "${CMAKE_SOURCE_DIR}/plugins")
add_subdirectory(plugins)
endif()
# tests
if(ACTUALLY_ENABLE_TESTS)
add_subdirectory(3rdparty/gtest EXCLUDE_FROM_ALL)
add_subdirectory(tests/ctest)
endif()
#-------------------------------------------------------------------------------
# Install some files to ease package creation

View File

@ -21,6 +21,7 @@
# Misc option
#-------------------------------------------------------------------------------
option(DISABLE_BUILD_DATE "Disable including the binary compile date")
option(ENABLE_TESTS "Enables building the unit tests" ON)
if(DISABLE_BUILD_DATE OR openSUSE)
message(STATUS "Disabling the inclusion of the binary compile date.")
@ -241,7 +242,7 @@ elseif(${PCSX2_TARGET_ARCHITECTURES} MATCHES "x86_64")
set(ARCH_FLAG "-march=native")
endif()
endif()
add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1)
add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1 -D__M_X86_64=1)
set(_ARCH_64 1)
set(_M_X86 1)
set(_M_X86_64 1)

View File

@ -197,6 +197,14 @@ if(HarfBuzz_FOUND)
include_directories(${HarfBuzz_INCLUDE_DIRS})
endif()
set(ACTUALLY_ENABLE_TESTS ${ENABLE_TESTS})
if(ENABLE_TESTS)
if(NOT EXISTS "${CMAKE_SOURCE_DIR}/3rdparty/gtest/CMakeLists.txt")
message(WARNING "ENABLE_TESTS was on but gtest was not found, unit tests will not be enabled")
set(ACTUALLY_ENABLE_TESTS Off)
endif()
endif()
#----------------------------------------
# Use project-wide include directories
#----------------------------------------

View File

@ -241,7 +241,3 @@ static const int __pagesize = PCSX2_PAGESIZE;
#define __fc __fastcall
#endif
#if defined(__x86_64__) || defined(_M_AMD64)
#define __M_X86_64
#endif

View File

@ -27,8 +27,12 @@ struct xImpl_JmpCall
{
bool isJmp;
void operator()(const xRegisterInt &absreg) const;
void operator()(const xIndirect64orLess &src) const;
void operator()(const xAddressReg &absreg) const;
void operator()(const xIndirectNative &src) const;
#ifdef __M_X86_64
[[deprecated]] // Should move to xIndirectNative
void operator()(const xIndirect32 &absreg) const;
#endif
// Special form for calling functions. This form automatically resolves the
// correct displacement based on the size of the instruction being generated.
@ -41,6 +45,7 @@ struct xImpl_JmpCall
// always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic).
sptr dest = (sptr)func - ((sptr)xGetPtr() + 5);
pxAssertMsg(dest == (s32)dest, "Indirect jump is too far, must use a register!");
xWrite8(0xe8);
xWrite32(dest);
}
@ -56,131 +61,43 @@ struct xImpl_FastCall
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
// some registers. But I think it is enough to handle the first call.
void operator()(void *f, const xRegister32 &a1 = xEmptyReg, const xRegister32 &a2 = xEmptyReg) const;
void operator()(void *f, u32 a1, const xRegister32 &a2) const;
void operator()(void *f, const xIndirect32 &a1) const;
void operator()(void *f, u32 a1, u32 a2) const;
// Type unsafety is nice
#ifdef __M_X86_64
#define XFASTCALL \
xCALL(f);
#define XFASTCALL1 \
xMOV(rdi, a1); \
xCALL(f);
#define XFASTCALL2 \
xMOV(rdi, a1); \
xMOV(rsi, a2); \
xCALL(f);
#else
#define XFASTCALL \
xCALL(f);
#define XFASTCALL1 \
xMOV(ecx, a1); \
xCALL(f);
#define XFASTCALL2 \
xMOV(ecx, a1); \
xMOV(edx, a2); \
xCALL(f);
void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const;
void operator()(void *f, u32 a1, const xRegisterLong &a2) const;
[[deprecated]] // Switch to xIndirect32, as the size of this isn't obvious
#endif
void operator()(void *f, const xIndirectVoid &a1) const;
void operator()(void *f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const
template <typename T>
__fi void operator()(T *func, u32 a1, const xRegisterLong &a2 = xEmptyReg) const
{
#ifdef __M_X86_64
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
(*this)((void *)func, a1, a2);
}
template <typename T>
__fi void operator()(T *func, u32 a1, const xRegisterLong &a2) const
__fi void operator()(T *func, const xIndirect32 &a1) const
{
void *f = (void *)func;
#ifdef __M_X86_64
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template <typename T>
__fi void operator()(T *func, const xIndirectVoid &a1) const
{
void *f = (void *)func;
#ifdef __M_X86_64
XFASTCALL1;
#else
XFASTCALL1;
#endif
(*this)((void*)func, a1);
}
template <typename T>
__fi void operator()(T *func, u32 a1, u32 a2) const
{
void *f = (void *)func;
#ifdef __M_X86_64
XFASTCALL2;
#else
XFASTCALL2;
#endif
(*this)((void*)func, a1, a2);
}
template <typename T>
__fi void operator()(T *func, u32 a1) const
{
void *f = (void *)func;
#ifdef __M_X86_64
XFASTCALL1;
#else
XFASTCALL1;
[[deprecated]] // Switch to xIndirectNative
void operator()(const xIndirect32 &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const;
#endif
}
void operator()(const xIndirect32 &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const
{
#ifdef __M_X86_64
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
#undef XFASTCALL
#undef XFASTCALL1
#undef XFASTCALL2
void operator()(const xIndirectNative &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const;
};
} // End namespace x86Emitter

View File

@ -33,8 +33,8 @@ struct xImpl_Mov
void operator()(const xRegisterInt &to, const xRegisterInt &from) const;
void operator()(const xIndirectVoid &dest, const xRegisterInt &from) const;
void operator()(const xRegisterInt &to, const xIndirectVoid &src) const;
void operator()(const xIndirect64orLess &dest, int imm) const;
void operator()(const xRegisterInt &to, int imm, bool preserve_flags = false) const;
void operator()(const xIndirect64orLess &dest, sptr imm) const;
void operator()(const xRegisterInt &to, sptr imm, bool preserve_flags = false) const;
#if 0
template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg<T>& immOrReg ) const
@ -70,6 +70,20 @@ struct xImpl_Mov
#endif
};
#ifdef __M_X86_64
// --------------------------------------------------------------------------------------
// xImpl_MovImm64
// --------------------------------------------------------------------------------------
// Mov with 64-bit immediates (only available on 64-bit platforms)
//
struct xImpl_MovImm64
{
xImpl_MovImm64() {} // Satisfy GCC's whims.
void operator()(const xRegister64 &to, s64 imm, bool preserve_flags = false) const;
};
#endif
// --------------------------------------------------------------------------------------
// xImpl_CMov
// --------------------------------------------------------------------------------------

View File

@ -57,6 +57,9 @@ extern const xImpl_G1Compare xCMP;
// flags.
extern const xImpl_Mov xMOV;
#ifdef __M_X86_64
extern const xImpl_MovImm64 xMOV64;
#endif
extern const xImpl_Test xTEST;
extern const xImpl_Group2 xROL, xROR,

View File

@ -25,12 +25,12 @@ namespace x86Emitter
#define OpWriteSSE(pre, op) xOpWrite0F(pre, op, to, from)
extern void SimdPrefix(u8 prefix, u16 opcode);
extern void EmitSibMagic(uint regfield, const void *address);
extern void EmitSibMagic(uint regfield, const xIndirectVoid &info);
extern void EmitSibMagic(uint reg1, const xRegisterBase &reg2);
extern void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2);
extern void EmitSibMagic(const xRegisterBase &reg1, const void *src);
extern void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib);
extern void EmitSibMagic(uint regfield, const void *address, int extraRIPOffset = 0);
extern void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset = 0);
extern void EmitSibMagic(uint reg1, const xRegisterBase &reg2, int = 0);
extern void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2, int = 0);
extern void EmitSibMagic(const xRegisterBase &reg1, const void *src, int extraRIPOffset = 0);
extern void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib, int extraRIPOffset = 0);
extern void EmitRex(uint regfield, const void *address);
extern void EmitRex(uint regfield, const xIndirectVoid &info);
@ -49,7 +49,7 @@ inline void xWrite(T val)
}
template <typename T1, typename T2>
__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 &param1, const T2 &param2)
__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 &param1, const T2 &param2, int extraRIPOffset = 0)
{
if (prefix != 0)
xWrite8(prefix);
@ -57,7 +57,7 @@ __emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 &param1, const T2 &par
xWrite8(opcode);
EmitSibMagic(param1, param2);
EmitSibMagic(param1, param2, extraRIPOffset);
}
template <typename T1, typename T2>
@ -96,7 +96,13 @@ __emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1 &param1, const T2 &
template <typename T1, typename T2>
__emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1 &param1, const T2 &param2, u8 imm8)
{
xOpWrite0F(prefix, opcode, param1, param2);
if (prefix != 0)
xWrite8(prefix);
EmitRex(param1, param2);
SimdPrefix(0, opcode);
EmitSibMagic(param1, param2, 1);
xWrite8(imm8);
}

View File

@ -181,6 +181,8 @@ enum SSE2_ComparisonType {
static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field)
static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
static const int Sib_EIZ = 4; // same index value as ESP (used in Index field)
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
extern void xSetPtr(void *ptr);
extern void xAlignPtr(uint bytes);
@ -210,9 +212,20 @@ public:
xWrite8(0x66);
}
int GetImmSize() const {
switch (GetOperandSize()) {
case 1: return 1;
case 2: return 2;
case 4: return 4;
case 8: return 4; // Only mov's take 64-bit immediates
jNO_DEFAULT
}
return 0;
}
void xWriteImm(int imm) const
{
switch (GetOperandSize()) {
switch (GetImmSize()) {
case 1:
xWrite8(imm);
break;
@ -222,9 +235,6 @@ public:
case 4:
xWrite32(imm);
break;
case 8:
xWrite64(imm);
break;
jNO_DEFAULT
}
@ -315,6 +325,9 @@ public:
{
}
/// Get a non-wide version of the register (for use with e.g. mov, where `mov eax, 3` and `mov rax, 3` are functionally identical but `mov eax, 3` is shorter)
virtual const xRegisterInt& GetNonWide() const = 0;
bool operator==(const xRegisterInt &src) const { return Id == src.Id && (GetOperandSize() == src.GetOperandSize()); }
bool operator!=(const xRegisterInt &src) const { return !operator==(src); }
};
@ -336,7 +349,8 @@ public:
{
}
virtual uint GetOperandSize() const { return 1; }
virtual uint GetOperandSize() const override { return 1; }
virtual const xRegisterInt& GetNonWide() const override { return *this; }
bool operator==(const xRegister8 &src) const { return Id == src.Id; }
bool operator!=(const xRegister8 &src) const { return Id != src.Id; }
@ -356,7 +370,8 @@ public:
{
}
virtual uint GetOperandSize() const { return 2; }
virtual uint GetOperandSize() const override { return 2; }
virtual const xRegisterInt& GetNonWide() const override { return *this; }
bool operator==(const xRegister16 &src) const { return this->Id == src.Id; }
bool operator!=(const xRegister16 &src) const { return this->Id != src.Id; }
@ -376,7 +391,8 @@ public:
{
}
virtual uint GetOperandSize() const { return 4; }
virtual uint GetOperandSize() const override { return 4; }
virtual const xRegisterInt& GetNonWide() const override { return *this; }
bool operator==(const xRegister32 &src) const { return this->Id == src.Id; }
bool operator!=(const xRegister32 &src) const { return this->Id != src.Id; }
@ -386,17 +402,21 @@ class xRegister64 : public xRegisterInt
{
typedef xRegisterInt _parent;
xRegister32 m_nonWide;
public:
xRegister64()
: _parent()
, m_nonWide()
{
}
explicit xRegister64(int regId)
: _parent(regId)
, m_nonWide(regId)
{
}
virtual uint GetOperandSize() const { return 8; }
virtual uint GetOperandSize() const override { return 8; }
virtual const xRegisterInt& GetNonWide() const override { return m_nonWide; }
bool operator==(const xRegister64 &src) const { return this->Id == src.Id; }
bool operator!=(const xRegister64 &src) const { return this->Id != src.Id; }
@ -498,9 +518,9 @@ public:
bool IsStackPointer() const { return Id == 4; }
xAddressVoid operator+(const xAddressReg &right) const;
xAddressVoid operator+(s32 right) const;
xAddressVoid operator+(sptr right) const;
xAddressVoid operator+(const void *right) const;
xAddressVoid operator-(s32 right) const;
xAddressVoid operator-(sptr right) const;
xAddressVoid operator-(const void *right) const;
xAddressVoid operator*(int factor) const;
xAddressVoid operator<<(u32 shift) const;
@ -522,6 +542,11 @@ struct xRegisterEmpty
return xRegister16(xRegId_Empty);
}
operator xRegister32() const
{
return xRegister32(xRegId_Empty);
}
operator xRegisterSSE() const
{
return xRegisterSSE(xRegId_Empty);
@ -627,6 +652,13 @@ extern const xAddressReg
eax, ebx, ecx, edx,
esi, edi, ebp, esp;
// Temporary registers to aid the move to x86-64
extern const xRegister32
eaxd, ebxd, ecxd, edxd,
esid, edid, ebpd, espd,
r8d, r9d, r10d, r11d,
r12d, r13d, r14d, r15d;
extern const xRegister16
ax, bx, cx, dx,
si, di, bp, sp;
@ -635,6 +667,19 @@ extern const xRegister8
al, dl, bl,
ah, ch, dh, bh;
extern const xAddressReg
arg1reg, arg2reg,
arg3reg, arg4reg,
calleeSavedReg1,
calleeSavedReg2;
extern const xRegister32
arg1regd, arg2regd,
calleeSavedReg1d,
calleeSavedReg2d;
// clang-format on
extern const xRegisterCL cl; // I'm special!
@ -661,19 +706,19 @@ public:
xAddressReg Base; // base register (no scale)
xAddressReg Index; // index reg gets multiplied by the scale
int Factor; // scale applied to the index register, in factor form (not a shift!)
s32 Displacement; // address displacement // 4B max even on 64 bits
sptr Displacement; // address displacement // 4B max even on 64 bits but keep rest for assertions
public:
xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor = 1, s32 displacement = 0);
xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor = 1, sptr displacement = 0);
xAddressVoid(const xAddressReg &index, int displacement = 0);
xAddressVoid(const xAddressReg &index, sptr displacement = 0);
explicit xAddressVoid(const void *displacement);
explicit xAddressVoid(s32 displacement = 0);
explicit xAddressVoid(sptr displacement = 0);
public:
bool IsByteSizeDisp() const { return is_s8(Displacement); }
xAddressVoid &Add(s32 imm)
xAddressVoid &Add(sptr imm)
{
Displacement += imm;
return *this;
@ -684,13 +729,13 @@ public:
__fi xAddressVoid operator+(const xAddressReg &right) const { return xAddressVoid(*this).Add(right); }
__fi xAddressVoid operator+(const xAddressVoid &right) const { return xAddressVoid(*this).Add(right); }
__fi xAddressVoid operator+(s32 imm) const { return xAddressVoid(*this).Add(imm); }
__fi xAddressVoid operator-(s32 imm) const { return xAddressVoid(*this).Add(-imm); }
__fi xAddressVoid operator+(sptr imm) const { return xAddressVoid(*this).Add(imm); }
__fi xAddressVoid operator-(sptr imm) const { return xAddressVoid(*this).Add(-imm); }
__fi xAddressVoid operator+(const void *addr) const { return xAddressVoid(*this).Add((uptr)addr); }
__fi void operator+=(const xAddressReg &right) { Add(right); }
__fi void operator+=(s32 imm) { Add(imm); }
__fi void operator-=(s32 imm) { Add(-imm); }
__fi void operator+=(sptr imm) { Add(imm); }
__fi void operator-=(sptr imm) { Add(-imm); }
};
// --------------------------------------------------------------------------------------
@ -702,7 +747,7 @@ class xAddressInfo : public xAddressVoid
typedef xAddressVoid _parent;
public:
xAddressInfo(const xAddressReg &base, const xAddressReg &index, int factor = 1, s32 displacement = 0)
xAddressInfo(const xAddressReg &base, const xAddressReg &index, int factor = 1, sptr displacement = 0)
: _parent(base, index, factor, displacement)
{
}
@ -710,17 +755,17 @@ public:
/*xAddressInfo( const xAddressVoid& src )
: _parent( src ) {}*/
explicit xAddressInfo(const xAddressReg &index, int displacement = 0)
explicit xAddressInfo(const xAddressReg &index, sptr displacement = 0)
: _parent(index, displacement)
{
}
explicit xAddressInfo(s32 displacement = 0)
explicit xAddressInfo(sptr displacement = 0)
: _parent(displacement)
{
}
static xAddressInfo<BaseType> FromIndexReg(const xAddressReg &index, int scale = 0, s32 displacement = 0);
static xAddressInfo<BaseType> FromIndexReg(const xAddressReg &index, int scale = 0, sptr displacement = 0);
public:
using _parent::operator+=;
@ -728,7 +773,7 @@ public:
bool IsByteSizeDisp() const { return is_s8(Displacement); }
xAddressInfo<BaseType> &Add(s32 imm)
xAddressInfo<BaseType> &Add(sptr imm)
{
Displacement += imm;
return *this;
@ -747,8 +792,8 @@ public:
__fi xAddressInfo<BaseType> operator+(const xAddressReg &right) const { return xAddressInfo(*this).Add(right); }
__fi xAddressInfo<BaseType> operator+(const xAddressInfo<BaseType> &right) const { return xAddressInfo(*this).Add(right); }
__fi xAddressInfo<BaseType> operator+(s32 imm) const { return xAddressInfo(*this).Add(imm); }
__fi xAddressInfo<BaseType> operator-(s32 imm) const { return xAddressInfo(*this).Add(-imm); }
__fi xAddressInfo<BaseType> operator+(sptr imm) const { return xAddressInfo(*this).Add(imm); }
__fi xAddressInfo<BaseType> operator-(sptr imm) const { return xAddressInfo(*this).Add(-imm); }
__fi xAddressInfo<BaseType> operator+(const void *addr) const { return xAddressInfo(*this).Add((uptr)addr); }
__fi void operator+=(const xAddressInfo<BaseType> &right) { Add(right); }
@ -765,7 +810,7 @@ static __fi xAddressVoid operator+(const void *addr, const xAddressVoid &right)
return right + addr;
}
static __fi xAddressVoid operator+(s32 addr, const xAddressVoid &right)
static __fi xAddressVoid operator+(sptr addr, const xAddressVoid &right)
{
return right + addr;
}
@ -778,7 +823,7 @@ static __fi xAddressInfo<OperandType> operator+(const void *addr, const xAddress
}
template <typename OperandType>
static __fi xAddressInfo<OperandType> operator+(s32 addr, const xAddressInfo<OperandType> &right)
static __fi xAddressInfo<OperandType> operator+(sptr addr, const xAddressInfo<OperandType> &right)
{
return right + addr;
}
@ -836,29 +881,31 @@ public:
xAddressReg Base; // base register (no scale)
xAddressReg Index; // index reg gets multiplied by the scale
uint Scale; // scale applied to the index register, in scale/shift form
s32 Displacement; // offset applied to the Base/Index registers.
sptr Displacement; // offset applied to the Base/Index registers.
// Displacement is 8/32 bits even on x86_64
// However we need the whole pointer to calculate rip-relative offsets
public:
explicit xIndirectVoid(s32 disp);
explicit xIndirectVoid(sptr disp);
explicit xIndirectVoid(const xAddressVoid &src);
xIndirectVoid(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0);
xIndirectVoid(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0);
virtual uint GetOperandSize() const;
xIndirectVoid &Add(s32 imm);
xIndirectVoid &Add(sptr imm);
bool IsByteSizeDisp() const { return is_s8(Displacement); }
bool IsMem() const { return true; }
bool IsReg() const { return false; }
bool IsExtended() const { return false; } // Non sense but ease template
bool IsWide() const { return GetOperandSize() == 8; }
operator xAddressVoid()
{
return xAddressVoid(Base, Index, Scale, Displacement);
}
__fi xIndirectVoid operator+(const s32 imm) const { return xIndirectVoid(*this).Add(imm); }
__fi xIndirectVoid operator-(const s32 imm) const { return xIndirectVoid(*this).Add(-imm); }
__fi xIndirectVoid operator+(const sptr imm) const { return xIndirectVoid(*this).Add(imm); }
__fi xIndirectVoid operator-(const sptr imm) const { return xIndirectVoid(*this).Add(-imm); }
protected:
void Reduce();
@ -870,7 +917,7 @@ class xIndirect : public xIndirectVoid
typedef xIndirectVoid _parent;
public:
explicit xIndirect(s32 disp)
explicit xIndirect(sptr disp)
: _parent(disp)
{
}
@ -878,21 +925,21 @@ public:
: _parent(src)
{
}
xIndirect(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0)
xIndirect(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0)
: _parent(base, index, scale, displacement)
{
}
virtual uint GetOperandSize() const { return sizeof(OperandType); }
xIndirect<OperandType> &Add(s32 imm)
xIndirect<OperandType> &Add(sptr imm)
{
Displacement += imm;
return *this;
}
__fi xIndirect<OperandType> operator+(const s32 imm) const { return xIndirect(*this).Add(imm); }
__fi xIndirect<OperandType> operator-(const s32 imm) const { return xIndirect(*this).Add(-imm); }
__fi xIndirect<OperandType> operator+(const sptr imm) const { return xIndirect(*this).Add(imm); }
__fi xIndirect<OperandType> operator-(const sptr imm) const { return xIndirect(*this).Add(-imm); }
bool operator==(const xIndirect<OperandType> &src) const
{
@ -914,6 +961,11 @@ typedef xIndirect<u64> xIndirect64;
typedef xIndirect<u32> xIndirect32;
typedef xIndirect<u16> xIndirect16;
typedef xIndirect<u8> xIndirect8;
#ifdef __M_X86_64
typedef xIndirect<u64> xIndirectNative;
#else
typedef xIndirect<u32> xIndirectNative;
#endif
// --------------------------------------------------------------------------------------
// xIndirect64orLess - base class 64, 32, 16, and 8 bit operand types
@ -952,11 +1004,11 @@ public:
protected:
//xIndirect64orLess( const xAddressVoid& src ) : _parent( src ) {}
explicit xIndirect64orLess(s32 disp)
explicit xIndirect64orLess(sptr disp)
: _parent(disp)
{
}
xIndirect64orLess(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0)
xIndirect64orLess(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0)
: _parent(base, index, scale, displacement)
{
}
@ -995,6 +1047,7 @@ public:
// ptr[] - use this form for instructions which can resolve the address operand size from
// the other register operand sizes.
extern const xAddressIndexer<xIndirectVoid> ptr;
extern const xAddressIndexer<xIndirectNative> ptrNative;
extern const xAddressIndexer<xIndirect128> ptr128;
extern const xAddressIndexer<xIndirect64> ptr64;
extern const xAddressIndexer<xIndirect32> ptr32;
@ -1165,7 +1218,7 @@ static __fi xAddressVoid operator+(const void *addr, const xAddressReg &reg)
return reg + (sptr)addr;
}
static __fi xAddressVoid operator+(s32 addr, const xAddressReg &reg)
static __fi xAddressVoid operator+(sptr addr, const xAddressReg &reg)
{
return reg + (sptr)addr;
}

View File

@ -50,7 +50,7 @@ static void _g1_IndirectImm(G1Type InstType, const xIndirect64orLess &sibdest, i
xWrite<s8>(imm);
} else {
u8 opcode = is_s8(imm) ? 0x83 : 0x81;
xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest);
xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest, is_s8(imm) ? 1 : sibdest.GetImmSize());
if (is_s8(imm))
xWrite<s8>(imm);
@ -156,7 +156,7 @@ void xImpl_Group2::operator()(const xIndirect64orLess &sibdest, u8 imm) const
// special encoding of 1's
xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xd0 : 0xd1, InstType, sibdest);
} else {
xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest);
xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest, 1);
xWrite8(imm);
}
}
@ -195,7 +195,7 @@ static void _imul_ImmStyle(const xRegisterInt &param1, const SrcType &param2, in
{
pxAssert(param1.GetOperandSize() == param2.GetOperandSize());
xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2);
xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2, is_s8(imm) ? 1 : param1.GetImmSize());
if (is_s8(imm))
xWrite8((u8)imm);

View File

@ -34,12 +34,110 @@
namespace x86Emitter
{
void xImpl_JmpCall::operator()(const xRegisterInt &absreg) const { xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg); }
void xImpl_JmpCall::operator()(const xIndirect64orLess &src) const { xOpWrite(0, 0xff, isJmp ? 4 : 2, src); }
void xImpl_JmpCall::operator()(const xAddressReg &absreg) const {
// Jumps are always wide and don't need the rex.W
xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg.GetNonWide());
}
void xImpl_JmpCall::operator()(const xIndirectNative &src) const {
// Jumps are always wide and don't need the rex.W
EmitRex(0, xIndirect32(src.Base, src.Index, 1, 0));
xWrite8(0xff);
EmitSibMagic(isJmp ? 4 : 2, src);
}
#ifdef __M_X86_64
void xImpl_JmpCall::operator()(const xIndirect32 &absreg) const {
xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg);
}
#endif
const xImpl_JmpCall xJMP = {true};
const xImpl_JmpCall xCALL = {false};
template <typename Reg1, typename Reg2>
void prepareRegsForFastcall(const Reg1 &a1, const Reg2 &a2) {
if (a1.IsEmpty()) return;
// Make sure we don't mess up if someone tries to fastcall with a1 in arg2reg and a2 in arg1reg
if (a2.Id != arg1reg.Id) {
xMOV(Reg1(arg1reg.Id), a1);
if (!a2.IsEmpty()) {
xMOV(Reg2(arg2reg.Id), a2);
}
} else if (a1.Id != arg2reg.Id) {
xMOV(Reg2(arg2reg.Id), a2);
xMOV(Reg1(arg1reg.Id), a1);
} else {
xPUSH(a1);
xMOV(Reg2(arg2reg.Id), a2);
xPOP(Reg1(arg1reg.Id));
}
}
void xImpl_FastCall::operator()(void *f, const xRegister32 &a1, const xRegister32 &a2) const {
prepareRegsForFastcall(a1, a2);
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
if ((sptr)disp == (s32)disp) {
xCALL(f);
} else {
xMOV(rax, ptrNative[f]);
xCALL(rax);
}
}
#ifdef __M_X86_64
void xImpl_FastCall::operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2) const {
prepareRegsForFastcall(a1, a2);
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
if ((sptr)disp == (s32)disp) {
xCALL(f);
} else {
xMOV(rax, ptrNative[f]);
xCALL(rax);
}
}
void xImpl_FastCall::operator()(void *f, u32 a1, const xRegisterLong &a2) const {
if (!a2.IsEmpty()) { xMOV(arg2reg, a2); }
xMOV(arg1reg, a1);
(*this)(f, arg1reg, arg2reg);
}
#endif
void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const {
if (!a2.IsEmpty()) { xMOV(arg2regd, a2); }
xMOV(arg1regd, a1);
(*this)(f, arg1regd, arg2regd);
}
void xImpl_FastCall::operator()(void *f, const xIndirect32 &a1) const {
xMOV(arg1regd, a1);
(*this)(f, arg1regd);
}
void xImpl_FastCall::operator()(void *f, const xIndirectVoid &a1) const {
xMOV(arg1regd, a1);
(*this)(f, arg1regd);
}
void xImpl_FastCall::operator()(void *f, u32 a1, u32 a2) const {
xMOV(arg1regd, a1);
xMOV(arg2regd, a2);
(*this)(f, arg1regd, arg2regd);
}
#ifdef __M_X86_64
void xImpl_FastCall::operator()(const xIndirect32 &f, const xRegisterLong &a1, const xRegisterLong &a2) const {
prepareRegsForFastcall(a1, a2);
xCALL(f);
}
#endif
void xImpl_FastCall::operator()(const xIndirectNative &f, const xRegisterLong &a1, const xRegisterLong &a2) const {
prepareRegsForFastcall(a1, a2);
xCALL(f);
}
const xImpl_FastCall xFastCall = {};
void xSmartJump::SetTarget()

View File

@ -56,15 +56,15 @@ void xImpl_Mov::operator()(const xIndirectVoid &dest, const xRegisterInt &from)
// mov eax has a special from when writing directly to a DISP32 address
// (sans any register index/base registers).
#ifndef __M_X86_64
// Note: On x86-64 this is an immediate 64-bit address, which is larger than the equivalent rip offset instr
if (from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty()) {
// FIXME: in 64 bits, it could be 8B whereas Displacement is limited to 4B normally
#ifdef __M_X86_64
pxAssert(0);
#endif
xOpAccWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xa2 : 0xa3, from.Id, dest);
xOpAccWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xa2 : 0xa3, from, dest);
xWrite32(dest.Displacement);
} else {
xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from.Id, dest);
} else
#endif
{
xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from, dest);
}
}
@ -73,40 +73,91 @@ void xImpl_Mov::operator()(const xRegisterInt &to, const xIndirectVoid &src) con
// mov eax has a special from when reading directly from a DISP32 address
// (sans any register index/base registers).
#ifndef __M_X86_64
// Note: On x86-64 this is an immediate 64-bit address, which is larger than the equivalent rip offset instr
if (to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty()) {
// FIXME: in 64 bits, it could be 8B whereas Displacement is limited to 4B normally
#ifdef __M_X86_64
pxAssert(0);
#endif
xOpAccWrite(to.GetPrefix16(), to.Is8BitOp() ? 0xa0 : 0xa1, to, src);
xWrite32(src.Displacement);
} else {
} else
#endif
{
xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0x8a : 0x8b, to, src);
}
}
void xImpl_Mov::operator()(const xIndirect64orLess &dest, int imm) const
void xImpl_Mov::operator()(const xIndirect64orLess &dest, sptr imm) const
{
xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest);
switch (dest.GetOperandSize()) {
case 1:
pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!");
break;
case 2:
pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!");
break;
case 4:
pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!");
break;
case 8:
pxAssertMsg(imm == (s32)imm, "Immediate won't fit in immediate slot, go through a register!");
break;
default:
pxAssertMsg(0, "Bad indirect size!");
}
xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest, dest.GetImmSize());
dest.xWriteImm(imm);
}
// preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
void xImpl_Mov::operator()(const xRegisterInt &to, int imm, bool preserve_flags) const
void xImpl_Mov::operator()(const xRegisterInt &to, sptr imm, bool preserve_flags) const
{
if (!preserve_flags && (imm == 0))
_g1_EmitOp(G1Type_XOR, to, to);
else {
switch (to.GetOperandSize()) {
case 1:
pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!");
break;
case 2:
pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!");
break;
case 4:
pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!");
break;
case 8:
pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit in immediate slot, use mov64 or lea!");
break;
default:
pxAssertMsg(0, "Bad indirect size!");
}
const xRegisterInt& to_ = to.GetNonWide();
if (!preserve_flags && (imm == 0)) {
_g1_EmitOp(G1Type_XOR, to_, to_);
} else if (imm == (u32)imm || !to.IsWide()) {
// Note: MOV does not have (reg16/32,imm8) forms.
u8 opcode = (to.Is8BitOp() ? 0xb0 : 0xb8) | to.Id;
xOpAccWrite(to.GetPrefix16(), opcode, 0, to);
u8 opcode = (to_.Is8BitOp() ? 0xb0 : 0xb8) | to_.Id;
xOpAccWrite(to_.GetPrefix16(), opcode, 0, to_);
to_.xWriteImm(imm);
} else {
xOpWrite(to.GetPrefix16(), 0xc7, 0, to);
to.xWriteImm(imm);
}
}
const xImpl_Mov xMOV;
#ifdef __M_X86_64
void xImpl_MovImm64::operator()(const xRegister64& to, s64 imm, bool preserve_flags) const
{
if (imm == (u32)imm || imm == (s32)imm) {
xMOV(to, imm, preserve_flags);
} else {
u8 opcode = 0xb8 | to.Id;
xOpAccWrite(to.GetPrefix16(), opcode, 0, to);
xWrite64(imm);
}
}
const xImpl_MovImm64 xMOV64;
#endif
// --------------------------------------------------------------------------------------
// CMOVcc
// --------------------------------------------------------------------------------------

View File

@ -97,6 +97,7 @@ __fi void xWrite64(u64 val)
// objects be initialized even though they have no actual variable members).
const xAddressIndexer<xIndirectVoid> ptr = {};
const xAddressIndexer<xIndirectNative> ptrNative = {};
const xAddressIndexer<xIndirect128> ptr128 = {};
const xAddressIndexer<xIndirect64> ptr64 = {};
const xAddressIndexer<xIndirect32> ptr32 = {};
@ -135,6 +136,16 @@ const xAddressReg
esp(4), ebp(5),
esi(6), edi(7);
const xRegister32
eaxd(0), ebxd(3),
ecxd(1), edxd(2),
espd(4), ebpd(5),
esid(6), edid(7),
r8d(8), r9d(9),
r10d(10), r11d(11),
r12d(12), r13d(13),
r14d(14), r15d(15);
const xRegister16
ax(0), bx(3),
cx(1), dx(2),
@ -147,6 +158,41 @@ const xRegister8
ah(4), ch(5),
dh(6), bh(7);
#if defined(_WIN32) || !defined(__M_X86_64)
const xAddressReg
arg1reg = rcx,
arg2reg = rdx,
#ifdef __M_X86_64
arg3reg = r8,
arg4reg = r9,
#else
arg3reg = xRegisterEmpty(),
arg4reg = xRegisterEmpty(),
#endif
calleeSavedReg1 = rdi,
calleeSavedReg2 = rsi;
const xRegister32
arg1regd = ecxd,
arg2regd = edxd,
calleeSavedReg1d = edid,
calleeSavedReg2d = esid;
#else
const xAddressReg
arg1reg = rdi,
arg2reg = rsi,
arg3reg = rdx,
arg4reg = rcx,
calleeSavedReg1 = r12,
calleeSavedReg2 = r13;
const xRegister32
arg1regd = edid,
arg2regd = esid,
calleeSavedReg1d = r12d,
calleeSavedReg2d = r13d;
#endif
// clang-format on
const xRegisterCL cl;
@ -250,16 +296,22 @@ static __fi void SibSB(u32 ss, u32 index, u32 base)
xWrite8((ss << 6) | (index << 3) | base);
}
void EmitSibMagic(uint regfield, const void *address)
void EmitSibMagic(uint regfield, const void *address, int extraRIPOffset)
{
ModRM(0, regfield, ModRm_UseDisp32);
// SIB encoding only supports 32bit offsets, even on x86_64
// We must make sure that the displacement is within the 32bit range
// Else we will fail out in a spectacular fashion
sptr displacement = (sptr)address;
#ifdef __M_X86_64
pxAssertDev(displacement >= -0x80000000LL && displacement < 0x80000000LL, "SIB target is too far away, needs an indirect register");
#ifndef __M_X86_64
ModRM(0, regfield, ModRm_UseDisp32);
#else
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
if (ripRelative == (s32)ripRelative) {
ModRM(0, regfield, ModRm_UseDisp32);
displacement = ripRelative;
} else {
pxAssertDev(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
ModRM(0, regfield, ModRm_UseSib);
SibSB(0, Sib_EIZ, Sib_UseDisp32);
}
#endif
xWrite<s32>((s32)displacement);
@ -293,7 +345,7 @@ static __fi bool NeedsSibMagic(const xIndirectVoid &info)
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
void EmitSibMagic(uint regfield, const xIndirectVoid &info)
void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
{
// 3 bits also on x86_64 (so max is 8)
// We might need to mask it on x86_64
@ -302,6 +354,8 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
((info.IsByteSizeDisp()) ? 1 : 2);
pxAssert(!info.Base.IsEmpty() || !info.Index.IsEmpty() || displacement_size == 2);
// Displacement is only 64 bits for rip-relative addressing
pxAssert(info.Displacement == (s32)info.Displacement || (info.Base.IsEmpty() && info.Index.IsEmpty()));
if (!NeedsSibMagic(info)) {
// Use ModRm-only encoding, with the rm field holding an index/base register, if
@ -310,13 +364,13 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
// encoded *with* a displacement of 0, if it would otherwise not have one).
if (info.Index.IsEmpty()) {
EmitSibMagic(regfield, (void *)info.Displacement);
EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset);
return;
} else {
if (info.Index == ebp && displacement_size == 0)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM(displacement_size, regfield, info.Index.Id);
ModRM(displacement_size, regfield, info.Index.Id & 7);
}
} else {
// In order to encode "just" index*scale (and no base), we have to encode
@ -327,7 +381,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
if (info.Base.IsEmpty()) {
ModRM(0, regfield, ModRm_UseSib);
SibSB(info.Scale, info.Index.Id, ModRm_UseDisp32);
SibSB(info.Scale, info.Index.Id, Sib_UseDisp32);
xWrite<s32>(info.Displacement);
return;
} else {
@ -335,7 +389,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM(displacement_size, regfield, ModRm_UseSib);
SibSB(info.Scale, info.Index.Id, info.Base.Id);
SibSB(info.Scale, info.Index.Id & 7, info.Base.Id & 7);
}
}
@ -349,24 +403,24 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
// Writes a ModRM byte for "Direct" register access forms, which is used for all
// instructions taking a form of [reg,reg].
void EmitSibMagic(uint reg1, const xRegisterBase &reg2)
void EmitSibMagic(uint reg1, const xRegisterBase &reg2, int)
{
xWrite8((Mod_Direct << 6) | (reg1 << 3) | reg2.Id);
xWrite8((Mod_Direct << 6) | (reg1 << 3) | (reg2.Id & 7));
}
void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2)
void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2, int)
{
xWrite8((Mod_Direct << 6) | (reg1.Id << 3) | reg2.Id);
xWrite8((Mod_Direct << 6) | ((reg1.Id & 7) << 3) | (reg2.Id & 7));
}
void EmitSibMagic(const xRegisterBase &reg1, const void *src)
void EmitSibMagic(const xRegisterBase &reg1, const void *src, int extraRIPOffset)
{
EmitSibMagic(reg1.Id, src);
EmitSibMagic(reg1.Id & 7, src, extraRIPOffset);
}
void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib)
void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib, int extraRIPOffset)
{
EmitSibMagic(reg1.Id, sib);
EmitSibMagic(reg1.Id & 7, sib, extraRIPOffset);
}
//////////////////////////////////////////////////////////////////////////////////////////
@ -391,10 +445,14 @@ void EmitRex(uint regfield, const void *address)
void EmitRex(uint regfield, const xIndirectVoid &info)
{
bool w = info.Base.IsWide();
bool w = info.IsWide();
bool r = false;
bool x = false;
bool b = info.IsExtended();
bool x = info.Index.IsExtended();
bool b = info.Base.IsExtended();
if (!NeedsSibMagic(info)) {
b = x;
x = false;
}
EmitRex(w, r, x, b);
}
@ -432,6 +490,33 @@ void EmitRex(const xRegisterBase &reg1, const xIndirectVoid &sib)
bool r = reg1.IsExtended();
bool x = sib.Index.IsExtended();
bool b = sib.Base.IsExtended();
if (!NeedsSibMagic(sib)) {
b = x;
x = false;
}
EmitRex(w, r, x, b);
}
// For use by instructions that are implicitly wide
void EmitRexImplicitlyWide(const xRegisterBase &reg)
{
bool w = false;
bool r = false;
bool x = false;
bool b = reg.IsExtended();
EmitRex(w, r, x, b);
}
void EmitRexImplicitlyWide(const xIndirectVoid &sib)
{
bool w = false;
bool r = false;
bool x = sib.Index.IsExtended();
bool b = sib.Base.IsExtended();
if (!NeedsSibMagic(sib)) {
b = x;
x = false;
}
EmitRex(w, r, x, b);
}
@ -459,7 +544,7 @@ __emitinline u8 *xGetPtr()
__emitinline void xAlignPtr(uint bytes)
{
// forward align
x86Ptr = (u8 *)(((uptr)x86Ptr + bytes - 1) & ~(bytes - 1));
x86Ptr = (u8 *)(((uptr)x86Ptr + bytes - 1) & ~(uptr)(bytes - 1));
}
// Performs best-case alignment for the target CPU, for use prior to starting a new
@ -506,7 +591,7 @@ xAddressVoid xAddressReg::operator+(const xAddressReg &right) const
return xAddressVoid(*this, right);
}
xAddressVoid xAddressReg::operator+(s32 right) const
xAddressVoid xAddressReg::operator+(sptr right) const
{
pxAssertMsg(Id != -1, "Uninitialized x86 register.");
return xAddressVoid(*this, right);
@ -518,7 +603,7 @@ xAddressVoid xAddressReg::operator+(const void *right) const
return xAddressVoid(*this, (sptr)right);
}
xAddressVoid xAddressReg::operator-(s32 right) const
xAddressVoid xAddressReg::operator-(sptr right) const
{
pxAssertMsg(Id != -1, "Uninitialized x86 register.");
return xAddressVoid(*this, -right);
@ -547,7 +632,7 @@ xAddressVoid xAddressReg::operator<<(u32 shift) const
// xAddressVoid (method implementations)
// --------------------------------------------------------------------------------------
xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor, s32 displacement)
xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor, sptr displacement)
{
Base = base;
Index = index;
@ -558,7 +643,7 @@ xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, in
pxAssertMsg(index.Id != xRegId_Invalid, "Uninitialized x86 register.");
}
xAddressVoid::xAddressVoid(const xAddressReg &index, s32 displacement)
xAddressVoid::xAddressVoid(const xAddressReg &index, sptr displacement)
{
Base = xEmptyReg;
Index = index;
@ -568,7 +653,7 @@ xAddressVoid::xAddressVoid(const xAddressReg &index, s32 displacement)
pxAssertMsg(index.Id != xRegId_Invalid, "Uninitialized x86 register.");
}
xAddressVoid::xAddressVoid(s32 displacement)
xAddressVoid::xAddressVoid(sptr displacement)
{
Base = xEmptyReg;
Index = xEmptyReg;
@ -581,12 +666,7 @@ xAddressVoid::xAddressVoid(const void *displacement)
Base = xEmptyReg;
Index = xEmptyReg;
Factor = 0;
#ifdef __M_X86_64
pxAssert(0);
//Displacement = (s32)displacement;
#else
Displacement = (s32)displacement;
#endif
Displacement = (sptr)displacement;
}
xAddressVoid &xAddressVoid::Add(const xAddressReg &src)
@ -643,7 +723,7 @@ xIndirectVoid::xIndirectVoid(const xAddressVoid &src)
Reduce();
}
xIndirectVoid::xIndirectVoid(s32 disp)
xIndirectVoid::xIndirectVoid(sptr disp)
{
Base = xEmptyReg;
Index = xEmptyReg;
@ -653,7 +733,7 @@ xIndirectVoid::xIndirectVoid(s32 disp)
// no reduction necessary :D
}
xIndirectVoid::xIndirectVoid(xAddressReg base, xAddressReg index, int scale, s32 displacement)
xIndirectVoid::xIndirectVoid(xAddressReg base, xAddressReg index, int scale, sptr displacement)
{
Base = base;
Index = index;
@ -754,7 +834,7 @@ uint xIndirectVoid::GetOperandSize() const
return 0;
}
xIndirectVoid &xIndirectVoid::Add(s32 imm)
xIndirectVoid &xIndirectVoid::Add(sptr imm)
{
Displacement += imm;
return *this;
@ -775,7 +855,11 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
// See EmitSibMagic for commenting on SIB encoding.
if (!NeedsSibMagic(src)) {
// We should allow native-sized addressing regs (e.g. lea eax, [rax])
const xRegisterInt& sizeMatchedIndex = to.IsWide() ? src.Index : src.Index.GetNonWide();
const xRegisterInt& sizeMatchedBase = to.IsWide() ? src.Base : src.Base.GetNonWide();
if (!NeedsSibMagic(src) && src.Displacement == (s32)src.Displacement) {
// LEA Land: means we have either 1-register encoding or just an offset.
// offset is encodable as an immediate MOV, and a register is encodable
// as a register MOV.
@ -783,24 +867,17 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
if (src.Index.IsEmpty()) {
xMOV(to, src.Displacement);
return;
} else if (displacement_size == 0) {
_xMovRtoR(to, src.Index);
}
else if (displacement_size == 0) {
_xMovRtoR(to, sizeMatchedIndex);
return;
} else {
if (!preserve_flags) {
// encode as MOV and ADD combo. Make sure to use the immediate on the
// ADD since it can encode as an 8-bit sign-extended value.
} else if (!preserve_flags) {
// encode as MOV and ADD combo. Make sure to use the immediate on the
// ADD since it can encode as an 8-bit sign-extended value.
_xMovRtoR(to, src.Index);
xADD(to, src.Displacement);
return;
} else {
// note: no need to do ebp+0 check since we encode all 0 displacements as
// register assignments above (via MOV)
xWrite8(0x8d);
ModRM(displacement_size, to.Id, src.Index.Id);
}
_xMovRtoR(to, sizeMatchedIndex);
xADD(to, src.Displacement);
return;
}
} else {
if (src.Base.IsEmpty()) {
@ -816,49 +893,32 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
xSHL(to, src.Scale);
return;
}
xWrite8(0x8d);
ModRM(0, to.Id, ModRm_UseSib);
SibSB(src.Scale, src.Index.Id, ModRm_UseDisp32);
xWrite32(src.Displacement);
return;
} else {
if (src.Scale == 0) {
if (!preserve_flags) {
if (src.Index == esp) {
// ESP is not encodable as an index (ix86 ignores it), thus:
_xMovRtoR(to, src.Base); // will do the trick!
_xMovRtoR(to, sizeMatchedBase); // will do the trick!
if (src.Displacement)
xADD(to, src.Displacement);
return;
} else if (src.Displacement == 0) {
_xMovRtoR(to, src.Base);
_g1_EmitOp(G1Type_ADD, to, src.Index);
_xMovRtoR(to, sizeMatchedBase);
_g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex);
return;
}
} else if ((src.Index == esp) && (src.Displacement == 0)) {
// special case handling of ESP as Index, which is replaceable with
// a single MOV even when preserve_flags is set! :D
_xMovRtoR(to, src.Base);
_xMovRtoR(to, sizeMatchedBase);
return;
}
}
if (src.Base == ebp && displacement_size == 0)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
xWrite8(0x8d);
ModRM(displacement_size, to.Id, ModRm_UseSib);
SibSB(src.Scale, src.Index.Id, src.Base.Id);
}
}
if (displacement_size != 0) {
if (displacement_size == 1)
xWrite<s8>(src.Displacement);
else
xWrite<s32>(src.Displacement);
}
xOpWrite(0, 0x8d, to, src);
}
__emitinline void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags)
@ -888,7 +948,7 @@ void xImpl_Test::operator()(const xRegisterInt &to, const xRegisterInt &from) co
void xImpl_Test::operator()(const xIndirect64orLess &dest, int imm) const
{
xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xf6 : 0xf7, 0, dest);
xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xf6 : 0xf7, 0, dest, dest.GetImmSize());
dest.xWriteImm(imm);
}
@ -918,12 +978,12 @@ void xImpl_IncDec::operator()(const xRegisterInt &to) const
u8 regfield = isDec ? 1 : 0;
xOpWrite(to.GetPrefix16(), 0xfe, regfield, to);
} else {
#ifdef __M_X86_64
pxAssertMsg(0, "Single Byte INC/DEC aren't valid in 64 bits."
"You need to use the ModR/M form (FF/0 FF/1 opcodes)");
#endif
#ifdef __M_X86_64
xOpWrite(to.GetPrefix16(), 0xff, isDec ? 1 : 0, to);
#else
to.prefix16();
xWrite8((isDec ? 0x48 : 0x40) | to.Id);
#endif
}
}
@ -977,24 +1037,37 @@ const xImpl_DwordShift xSHRD = {0xac};
__emitinline void xPOP(const xIndirectVoid &from)
{
EmitRexImplicitlyWide(from);
xWrite8(0x8f);
EmitSibMagic(0, from);
}
__emitinline void xPUSH(const xIndirectVoid &from)
{
EmitRexImplicitlyWide(from);
xWrite8(0xff);
EmitSibMagic(6, from);
}
__fi void xPOP(xRegister32or64 from) { xWrite8(0x58 | from->Id); }
__fi void xPOP(xRegister32or64 from) {
EmitRexImplicitlyWide(from);
xWrite8(0x58 | (from->Id & 7));
}
__fi void xPUSH(u32 imm)
{
xWrite8(0x68);
xWrite32(imm);
if (is_s8(imm)) {
xWrite8(0x6a);
xWrite8(imm);
} else {
xWrite8(0x68);
xWrite32(imm);
}
}
__fi void xPUSH(xRegister32or64 from) {
EmitRexImplicitlyWide(from);
xWrite8(0x50 | (from->Id & 7));
}
__fi void xPUSH(xRegister32or64 from) { xWrite8(0x50 | from->Id); }
// pushes the EFLAGS register onto the stack
__fi void xPUSHFD() { xWrite8(0x9C); }
@ -1053,17 +1126,18 @@ __emitinline void xRestoreReg(const xRegisterSSE &dest)
//////////////////////////////////////////////////////////////////////////////////////////
// Helper object to handle ABI frame
#ifdef __GNUC__
#ifdef __M_X86_64
// GCC ensures/requires stack to be 16 bytes aligned (but when?)
// All x86-64 calling conventions ensure/require stack to be 16 bytes aligned
// I couldn't find documentation on when, but compilers would indicate it's before the call: https://gcc.godbolt.org/z/KzTfsz
#define ALIGN_STACK(v) xADD(rsp, v)
#else
#elif defined(__GNUC__)
// GCC ensures/requires stack to be 16 bytes aligned before the call
// Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes.
// EBP will take 4 bytes if m_base_frame is enabled
#define ALIGN_STACK(v) xADD(esp, v)
#endif
#else
@ -1077,41 +1151,35 @@ xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, in
m_save_base_pointer = save_base_pointer;
m_offset = offset;
#ifdef __M_X86_64
m_offset += 8; // Call stores the return address (4 bytes)
m_offset += sizeof(void*); // Call stores the return address (4 bytes)
// Note rbp can surely be optimized in 64 bits
if (m_base_frame) {
xPUSH(rbp);
xMOV(rbp, rsp);
m_offset += 8;
m_offset += sizeof(void*);
} else if (m_save_base_pointer) {
xPUSH(rbp);
m_offset += 8;
m_offset += sizeof(void*);
}
#ifdef __M_X86_64
xPUSH(rbx);
xPUSH(r12);
xPUSH(r13);
xPUSH(r14);
xPUSH(r15);
m_offset += 40;
#ifdef _WIN32
xPUSH(rdi);
xPUSH(rsi);
xSUB(rsp, 32); // Windows calling convention specifies additional space for the callee to spill registers
m_offset += 48;
#endif
#else
m_offset += 4; // Call stores the return address (4 bytes)
// Create a new frame
if (m_base_frame) {
xPUSH(ebp);
xMOV(ebp, esp);
m_offset += 4;
} else if (m_save_base_pointer) {
xPUSH(ebp);
m_offset += 4;
}
// Save the register context
xPUSH(edi);
xPUSH(esi);
@ -1130,19 +1198,17 @@ xScopedStackFrame::~xScopedStackFrame()
#ifdef __M_X86_64
// Restore the register context
#ifdef _WIN32
xADD(rsp, 32);
xPOP(rsi);
xPOP(rdi);
#endif
xPOP(r15);
xPOP(r14);
xPOP(r13);
xPOP(r12);
xPOP(rbx);
// Destroy the frame
if (m_base_frame) {
xLEAVE();
} else if (m_save_base_pointer) {
xPOP(rbp);
}
#else
// Restore the register context
@ -1150,14 +1216,14 @@ xScopedStackFrame::~xScopedStackFrame()
xPOP(esi);
xPOP(edi);
#endif
// Destroy the frame
if (m_base_frame) {
xLEAVE();
} else if (m_save_base_pointer) {
xPOP(ebp);
xPOP(rbp);
}
#endif
}
} // End namespace x86Emitter

View File

@ -0,0 +1,12 @@
enable_testing()
add_custom_target(unittests)
add_custom_command(TARGET unittests POST_BUILD COMMAND ${CMAKE_CTEST_COMMAND})
macro(add_pcsx2_test target)
add_executable(${target} EXCLUDE_FROM_ALL ${ARGN})
target_link_libraries(${target} PRIVATE x86emitter gtest_main Utilities)
add_dependencies(unittests ${target})
add_test(NAME ${target} COMMAND ${target})
endmacro()
add_subdirectory(x86emitter)

View File

@ -0,0 +1 @@
add_pcsx2_test(x86emitter_test codegen_tests.cpp codegen_tests_main.cpp codegen_tests.h)

View File

@ -0,0 +1,48 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2020 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include <gtest/gtest.h>
#include <x86emitter.h>
using namespace x86Emitter;
thread_local const char *currentTest;
void pxOnAssert(const DiagnosticOrigin &origin, const wxString &msg) {
FAIL() << "Assertion failed: " << msg
<< "\n at " << origin.srcfile << ":" << origin.line << ""
<< "\n when trying to assemble " << currentTest;
}
void runCodegenTest(void (*exec)(void *base), const char* description, const char* expected) {
u8 code[4096];
memset(code, 0xcc, sizeof(code));
char str[4096] = {0};
if (!expected) return;
currentTest = description;
xSetPtr(code);
exec(code);
char *strPtr = str;
for (u8* ptr = code; ptr < xGetPtr(); ptr++) {
sprintf(strPtr, "%02x ", *ptr);
strPtr += 3;
}
if (strPtr != str) {
// Remove final space
*--strPtr = '\0';
}
EXPECT_STRCASEEQ(expected, str) << "Unexpected codegen from " << description;
}

View File

@ -0,0 +1,29 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2020 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
void runCodegenTest(void (*exec)(void *base), const char* description, const char* expected);
// Use null to skip, empty string to expect no output
#ifdef __M_X86_64
# define CODEGEN_TEST(command, expected32, expected64) runCodegenTest([](void *base){ command; }, #command, expected64)
# define CODEGEN_TEST_64(command, expected) CODEGEN_TEST(command, nullptr, expected)
# define CODEGEN_TEST_32(command, expected)
#else
# define CODEGEN_TEST(command, expected32, expected64) runCodegenTest([](void *base){ command; }, #command, expected32)
# define CODEGEN_TEST_64(command, expected)
# define CODEGEN_TEST_32(command, expected) CODEGEN_TEST(command, expected, nullptr)
#endif
#define CODEGEN_TEST_BOTH(command, expected) CODEGEN_TEST(command, expected, expected)

View File

@ -0,0 +1,161 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2020 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "codegen_tests.h"
#include <gtest/gtest.h>
#include <x86emitter.h>
#include <cstdio>
using namespace x86Emitter;
TEST(CodegenTests, MOVTest)
{
CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0");
CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8");
CODEGEN_TEST_BOTH(xMOV(eaxd, ecxd), "89 c8");
CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0");
CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0");
CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0");
CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8");
CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01");
CODEGEN_TEST_BOTH(xMOV(eaxd, ptrNative[rcx]), "8b 01");
CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08");
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecxd), "89 08");
CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00");
CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00");
CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01");
CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08");
CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03");
CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03");
CODEGEN_TEST_BOTH(xMOV(eaxd, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecxd), "89 4c 98 03");
CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03");
CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03");
CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00");
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], 0), "c7 00 00 00 00 00");
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], -1), "c7 44 98 03 ff ff ff ff");
CODEGEN_TEST_64(xMOV(rax, 0xffffffff), "b8 ff ff ff ff");
CODEGEN_TEST_64(xMOV(r8, -1), "49 c7 c0 ff ff ff ff");
CODEGEN_TEST_64(xMOV64(rax, 0x1234567890), "48 b8 90 78 56 34 12 00 00 00");
CODEGEN_TEST_64(xMOV64(r8, 0x1234567890), "49 b8 90 78 56 34 12 00 00 00");
CODEGEN_TEST_64(xMOV(ptr32[base], 0x12), "c7 05 f6 ff ff ff 12 00 00 00");
}
TEST(CodegenTests, LEATest)
{
CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8
CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9
CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03");
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03");
CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff");
CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234
}
TEST(CodegenTests, PUSHTest)
{
CODEGEN_TEST_BOTH(xPUSH(rax), "50");
CODEGEN_TEST_64(xPUSH(r8), "41 50");
CODEGEN_TEST_BOTH(xPUSH(0x1234), "68 34 12 00 00");
CODEGEN_TEST_BOTH(xPUSH(0x12), "6a 12");
CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax]), "ff 30");
CODEGEN_TEST_64(xPUSH(ptrNative[r8]), "41 ff 30");
CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax*2+3+rbx]), "ff 74 43 03");
CODEGEN_TEST_64(xPUSH(ptrNative[rax*2+3+r8]), "41 ff 74 40 03");
CODEGEN_TEST_64(xPUSH(ptrNative[r9*4+3+r8]), "43 ff 74 88 03");
CODEGEN_TEST_64(xPUSH(ptrNative[r8*4+3+rax]), "42 ff 74 80 03");
CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax*8+0x1234+rbx]), "ff b4 c3 34 12 00 00");
CODEGEN_TEST_64(xPUSH(ptrNative[base]), "ff 35 fa ff ff ff");
CODEGEN_TEST(xPUSH(ptrNative[(void*)0x1234]), "ff 35 34 12 00 00", "ff 34 25 34 12 00 00");
}
TEST(CodegenTests, POPTest)
{
CODEGEN_TEST_BOTH(xPOP(rax), "58");
CODEGEN_TEST_64(xPOP(r8), "41 58");
CODEGEN_TEST_BOTH(xPOP(ptrNative[rax]), "8f 00");
CODEGEN_TEST_64(xPOP(ptrNative[r8]), "41 8f 00");
CODEGEN_TEST_BOTH(xPOP(ptrNative[rax*2+3+rbx]), "8f 44 43 03");
CODEGEN_TEST_64(xPOP(ptrNative[rax*2+3+r8]), "41 8f 44 40 03");
CODEGEN_TEST_64(xPOP(ptrNative[r9*4+3+r8]), "43 8f 44 88 03");
CODEGEN_TEST_64(xPOP(ptrNative[r8*4+3+rax]), "42 8f 44 80 03");
CODEGEN_TEST_BOTH(xPOP(ptrNative[rax*8+0x1234+rbx]), "8f 84 c3 34 12 00 00");
CODEGEN_TEST_64(xPOP(ptrNative[base]), "8f 05 fa ff ff ff");
CODEGEN_TEST(xPOP(ptrNative[(void*)0x1234]), "8f 05 34 12 00 00", "8f 04 25 34 12 00 00");
}
TEST(CodegenTests, MathTest)
{
CODEGEN_TEST(xINC(eaxd), "40", "ff c0");
CODEGEN_TEST(xDEC(rax), "48", "48 ff c8");
CODEGEN_TEST_64(xINC(r8), "49 ff c0");
CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8");
CODEGEN_TEST_64(xADD(r8, 0x12), "49 83 c0 12");
CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00");
CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0");
CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00");
CODEGEN_TEST_BOTH(xADD(eaxd, ebxd), "01 d8");
CODEGEN_TEST_BOTH(xADD(eaxd, 0x1234), "05 34 12 00 00");
CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03");
CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03");
CODEGEN_TEST_BOTH(xADD(eaxd, ptr32[rbx*4+3+rcx]), "03 44 99 03");
CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecxd), "01 4c 83 03");
CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12");
CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00");
CODEGEN_TEST_BOTH(xSUB(eaxd, ptr32[rcx*4+rax]), "2b 04 88");
CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff");
CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00");
CODEGEN_TEST_BOTH(xDIV(ecxd), "f7 f9");
}
TEST(CodegenTests, BitwiseTest)
{
CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8");
CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8");
CODEGEN_TEST_BOTH(xSHR(ecxd, cl), "d3 e9");
CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8");
CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c");
CODEGEN_TEST_BOTH(xSAR(eaxd, 30), "c1 f8 1e");
CODEGEN_TEST_BOTH(xSHL(ebxd, 30), "c1 e3 1e");
CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04");
CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8");
CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02");
CODEGEN_TEST_BOTH(xOR(esid, ptr32[rax+rbx]), "0b 34 18");
CODEGEN_TEST_64(xNOT(r8), "49 f7 d0");
CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10");
CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13");
}
TEST(CodegenTests, JmpTest)
{
CODEGEN_TEST_64(xJMP(r8), "41 ff e0");
CODEGEN_TEST_BOTH(xJMP(rdi), "ff e7");
CODEGEN_TEST_BOTH(xJMP(ptrNative[rax]), "ff 20");
CODEGEN_TEST_BOTH(xJA(base), "77 fe");
CODEGEN_TEST_BOTH(xJB((char*)base - 0xFFFF), "0f 82 fb ff fe ff");
}
TEST(CodegenTests, SSETest)
{
CODEGEN_TEST_BOTH(xMOVAPS(xmm0, xmm1), "0f 28 c1");
CODEGEN_TEST_64(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
CODEGEN_TEST_64(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
CODEGEN_TEST_64(xMOVAPS(ptr128[rax+r9], xmm8), "46 0f 29 04 08");
CODEGEN_TEST_BOTH(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
CODEGEN_TEST_64(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
CODEGEN_TEST_64(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02");
}