From 4f56db9f1844055967cd3aee8bf64613f2878515 Mon Sep 17 00:00:00 2001 From: tellowkrinkle Date: Wed, 19 Aug 2020 03:19:28 -0500 Subject: [PATCH] Fix codegen on x86-64 (#3512) Fix codegen on x86-64 Part 1 of the changes being worked on in #3451 Makes x86emitter emit the x86-64 machine code you would expect it to Also adds some unit tests to verify that things are working --- .gitmodules | 3 + 3rdparty/gtest | 1 + CMakeLists.txt | 6 + cmake/BuildParameters.cmake | 3 +- cmake/SearchForStuff.cmake | 8 + common/include/Pcsx2Defs.h | 4 - common/include/x86emitter/implement/jmpcall.h | 133 ++------ common/include/x86emitter/implement/movs.h | 18 +- common/include/x86emitter/instructions.h | 3 + common/include/x86emitter/internal.h | 24 +- common/include/x86emitter/x86types.h | 137 +++++--- common/src/x86emitter/groups.cpp | 6 +- common/src/x86emitter/jmp.cpp | 102 +++++- common/src/x86emitter/movs.cpp | 91 ++++-- common/src/x86emitter/x86emitter.cpp | 304 +++++++++++------- tests/ctest/CMakeLists.txt | 12 + tests/ctest/x86emitter/CMakeLists.txt | 1 + tests/ctest/x86emitter/codegen_tests.cpp | 48 +++ tests/ctest/x86emitter/codegen_tests.h | 29 ++ tests/ctest/x86emitter/codegen_tests_main.cpp | 161 ++++++++++ 20 files changed, 784 insertions(+), 310 deletions(-) create mode 160000 3rdparty/gtest create mode 100644 tests/ctest/CMakeLists.txt create mode 100644 tests/ctest/x86emitter/CMakeLists.txt create mode 100644 tests/ctest/x86emitter/codegen_tests.cpp create mode 100644 tests/ctest/x86emitter/codegen_tests.h create mode 100644 tests/ctest/x86emitter/codegen_tests_main.cpp diff --git a/.gitmodules b/.gitmodules index 39a8fed5ca..93248523d1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "3rdparty/xz/xz"] path = 3rdparty/xz/xz url = https://github.com/PCSX2/xz.git +[submodule "3rdparty/gtest"] + path = 3rdparty/gtest + url = https://github.com/google/googletest.git diff --git a/3rdparty/gtest b/3rdparty/gtest new file mode 160000 index 0000000000..703bd9caab --- /dev/null +++ b/3rdparty/gtest @@ -0,0 +1 @@ +Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e diff --git a/CMakeLists.txt b/CMakeLists.txt index bd74628c85..fa21c5161e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,12 @@ if(EXISTS "${CMAKE_SOURCE_DIR}/plugins") add_subdirectory(plugins) endif() +# tests +if(ACTUALLY_ENABLE_TESTS) + add_subdirectory(3rdparty/gtest EXCLUDE_FROM_ALL) + add_subdirectory(tests/ctest) +endif() + #------------------------------------------------------------------------------- # Install some files to ease package creation diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake index e1ca1ee922..e59ed0961d 100644 --- a/cmake/BuildParameters.cmake +++ b/cmake/BuildParameters.cmake @@ -21,6 +21,7 @@ # Misc option #------------------------------------------------------------------------------- option(DISABLE_BUILD_DATE "Disable including the binary compile date") +option(ENABLE_TESTS "Enables building the unit tests" ON) if(DISABLE_BUILD_DATE OR openSUSE) message(STATUS "Disabling the inclusion of the binary compile date.") @@ -241,7 +242,7 @@ elseif(${PCSX2_TARGET_ARCHITECTURES} MATCHES "x86_64") set(ARCH_FLAG "-march=native") endif() endif() - add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1) + add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1 -D__M_X86_64=1) set(_ARCH_64 1) set(_M_X86 1) set(_M_X86_64 1) diff --git a/cmake/SearchForStuff.cmake b/cmake/SearchForStuff.cmake index 781cc6d321..e9f5d6ec18 100644 --- a/cmake/SearchForStuff.cmake +++ b/cmake/SearchForStuff.cmake @@ -197,6 +197,14 @@ if(HarfBuzz_FOUND) include_directories(${HarfBuzz_INCLUDE_DIRS}) endif() +set(ACTUALLY_ENABLE_TESTS ${ENABLE_TESTS}) +if(ENABLE_TESTS) + if(NOT EXISTS "${CMAKE_SOURCE_DIR}/3rdparty/gtest/CMakeLists.txt") + message(WARNING "ENABLE_TESTS was on but gtest was not found, unit tests will not be enabled") + set(ACTUALLY_ENABLE_TESTS Off) + endif() +endif() + #---------------------------------------- # Use project-wide include directories #---------------------------------------- diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h index 02a931a464..e941b0ae20 100644 --- a/common/include/Pcsx2Defs.h +++ b/common/include/Pcsx2Defs.h @@ -241,7 +241,3 @@ static const int __pagesize = PCSX2_PAGESIZE; #define __fc __fastcall #endif - -#if defined(__x86_64__) || defined(_M_AMD64) -#define __M_X86_64 -#endif diff --git a/common/include/x86emitter/implement/jmpcall.h b/common/include/x86emitter/implement/jmpcall.h index bf69cf43ad..ffc45066ab 100644 --- a/common/include/x86emitter/implement/jmpcall.h +++ b/common/include/x86emitter/implement/jmpcall.h @@ -27,8 +27,12 @@ struct xImpl_JmpCall { bool isJmp; - void operator()(const xRegisterInt &absreg) const; - void operator()(const xIndirect64orLess &src) const; + void operator()(const xAddressReg &absreg) const; + void operator()(const xIndirectNative &src) const; +#ifdef __M_X86_64 + [[deprecated]] // Should move to xIndirectNative + void operator()(const xIndirect32 &absreg) const; +#endif // Special form for calling functions. This form automatically resolves the // correct displacement based on the size of the instruction being generated. @@ -41,6 +45,7 @@ struct xImpl_JmpCall // always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic). sptr dest = (sptr)func - ((sptr)xGetPtr() + 5); + pxAssertMsg(dest == (s32)dest, "Indirect jump is too far, must use a register!"); xWrite8(0xe8); xWrite32(dest); } @@ -56,131 +61,43 @@ struct xImpl_FastCall // FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop // some registers. But I think it is enough to handle the first call. + void operator()(void *f, const xRegister32 &a1 = xEmptyReg, const xRegister32 &a2 = xEmptyReg) const; + + void operator()(void *f, u32 a1, const xRegister32 &a2) const; + void operator()(void *f, const xIndirect32 &a1) const; + void operator()(void *f, u32 a1, u32 a2) const; -// Type unsafety is nice #ifdef __M_X86_64 - -#define XFASTCALL \ - xCALL(f); - -#define XFASTCALL1 \ - xMOV(rdi, a1); \ - xCALL(f); - -#define XFASTCALL2 \ - xMOV(rdi, a1); \ - xMOV(rsi, a2); \ - xCALL(f); - -#else - -#define XFASTCALL \ - xCALL(f); - -#define XFASTCALL1 \ - xMOV(ecx, a1); \ - xCALL(f); - -#define XFASTCALL2 \ - xMOV(ecx, a1); \ - xMOV(edx, a2); \ - xCALL(f); - + void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const; + void operator()(void *f, u32 a1, const xRegisterLong &a2) const; + [[deprecated]] // Switch to xIndirect32, as the size of this isn't obvious #endif + void operator()(void *f, const xIndirectVoid &a1) const; - void operator()(void *f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const + template + __fi void operator()(T *func, u32 a1, const xRegisterLong &a2 = xEmptyReg) const { -#ifdef __M_X86_64 - if (a1.IsEmpty()) { - XFASTCALL; - } else if (a2.IsEmpty()) { - XFASTCALL1; - } else { - XFASTCALL2; - } -#else - if (a1.IsEmpty()) { - XFASTCALL; - } else if (a2.IsEmpty()) { - XFASTCALL1; - } else { - XFASTCALL2; - } -#endif + (*this)((void *)func, a1, a2); } template - __fi void operator()(T *func, u32 a1, const xRegisterLong &a2) const + __fi void operator()(T *func, const xIndirect32 &a1) const { - void *f = (void *)func; - -#ifdef __M_X86_64 - XFASTCALL2; -#else - XFASTCALL2; -#endif - } - - template - __fi void operator()(T *func, const xIndirectVoid &a1) const - { - void *f = (void *)func; - -#ifdef __M_X86_64 - XFASTCALL1; -#else - XFASTCALL1; -#endif + (*this)((void*)func, a1); } template __fi void operator()(T *func, u32 a1, u32 a2) const { - void *f = (void *)func; - -#ifdef __M_X86_64 - XFASTCALL2; -#else - XFASTCALL2; -#endif + (*this)((void*)func, a1, a2); } - template - __fi void operator()(T *func, u32 a1) const - { - void *f = (void *)func; - #ifdef __M_X86_64 - XFASTCALL1; -#else - XFASTCALL1; + [[deprecated]] // Switch to xIndirectNative + void operator()(const xIndirect32 &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const; #endif - } - void operator()(const xIndirect32 &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const - { -#ifdef __M_X86_64 - if (a1.IsEmpty()) { - XFASTCALL; - } else if (a2.IsEmpty()) { - XFASTCALL1; - } else { - XFASTCALL2; - } -#else - if (a1.IsEmpty()) { - XFASTCALL; - } else if (a2.IsEmpty()) { - XFASTCALL1; - } else { - XFASTCALL2; - } -#endif - } - -#undef XFASTCALL -#undef XFASTCALL1 -#undef XFASTCALL2 + void operator()(const xIndirectNative &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const; }; } // End namespace x86Emitter diff --git a/common/include/x86emitter/implement/movs.h b/common/include/x86emitter/implement/movs.h index d159a0ad90..598afca20b 100644 --- a/common/include/x86emitter/implement/movs.h +++ b/common/include/x86emitter/implement/movs.h @@ -33,8 +33,8 @@ struct xImpl_Mov void operator()(const xRegisterInt &to, const xRegisterInt &from) const; void operator()(const xIndirectVoid &dest, const xRegisterInt &from) const; void operator()(const xRegisterInt &to, const xIndirectVoid &src) const; - void operator()(const xIndirect64orLess &dest, int imm) const; - void operator()(const xRegisterInt &to, int imm, bool preserve_flags = false) const; + void operator()(const xIndirect64orLess &dest, sptr imm) const; + void operator()(const xRegisterInt &to, sptr imm, bool preserve_flags = false) const; #if 0 template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg& immOrReg ) const @@ -70,6 +70,20 @@ struct xImpl_Mov #endif }; +#ifdef __M_X86_64 +// -------------------------------------------------------------------------------------- +// xImpl_MovImm64 +// -------------------------------------------------------------------------------------- +// Mov with 64-bit immediates (only available on 64-bit platforms) +// +struct xImpl_MovImm64 +{ + xImpl_MovImm64() {} // Satisfy GCC's whims. + + void operator()(const xRegister64 &to, s64 imm, bool preserve_flags = false) const; +}; +#endif + // -------------------------------------------------------------------------------------- // xImpl_CMov // -------------------------------------------------------------------------------------- diff --git a/common/include/x86emitter/instructions.h b/common/include/x86emitter/instructions.h index 9f9a1ff435..ce78611b4b 100644 --- a/common/include/x86emitter/instructions.h +++ b/common/include/x86emitter/instructions.h @@ -57,6 +57,9 @@ extern const xImpl_G1Compare xCMP; // flags. extern const xImpl_Mov xMOV; +#ifdef __M_X86_64 +extern const xImpl_MovImm64 xMOV64; +#endif extern const xImpl_Test xTEST; extern const xImpl_Group2 xROL, xROR, diff --git a/common/include/x86emitter/internal.h b/common/include/x86emitter/internal.h index aa5682d046..231630ea43 100644 --- a/common/include/x86emitter/internal.h +++ b/common/include/x86emitter/internal.h @@ -25,12 +25,12 @@ namespace x86Emitter #define OpWriteSSE(pre, op) xOpWrite0F(pre, op, to, from) extern void SimdPrefix(u8 prefix, u16 opcode); -extern void EmitSibMagic(uint regfield, const void *address); -extern void EmitSibMagic(uint regfield, const xIndirectVoid &info); -extern void EmitSibMagic(uint reg1, const xRegisterBase ®2); -extern void EmitSibMagic(const xRegisterBase ®1, const xRegisterBase ®2); -extern void EmitSibMagic(const xRegisterBase ®1, const void *src); -extern void EmitSibMagic(const xRegisterBase ®1, const xIndirectVoid &sib); +extern void EmitSibMagic(uint regfield, const void *address, int extraRIPOffset = 0); +extern void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset = 0); +extern void EmitSibMagic(uint reg1, const xRegisterBase ®2, int = 0); +extern void EmitSibMagic(const xRegisterBase ®1, const xRegisterBase ®2, int = 0); +extern void EmitSibMagic(const xRegisterBase ®1, const void *src, int extraRIPOffset = 0); +extern void EmitSibMagic(const xRegisterBase ®1, const xIndirectVoid &sib, int extraRIPOffset = 0); extern void EmitRex(uint regfield, const void *address); extern void EmitRex(uint regfield, const xIndirectVoid &info); @@ -49,7 +49,7 @@ inline void xWrite(T val) } template -__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 ¶m1, const T2 ¶m2) +__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 ¶m1, const T2 ¶m2, int extraRIPOffset = 0) { if (prefix != 0) xWrite8(prefix); @@ -57,7 +57,7 @@ __emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 ¶m1, const T2 &par xWrite8(opcode); - EmitSibMagic(param1, param2); + EmitSibMagic(param1, param2, extraRIPOffset); } template @@ -96,7 +96,13 @@ __emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1 ¶m1, const T2 & template __emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1 ¶m1, const T2 ¶m2, u8 imm8) { - xOpWrite0F(prefix, opcode, param1, param2); + if (prefix != 0) + xWrite8(prefix); + EmitRex(param1, param2); + + SimdPrefix(0, opcode); + + EmitSibMagic(param1, param2, 1); xWrite8(imm8); } diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index 4854c1aacf..155f8a37c8 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -181,6 +181,8 @@ enum SSE2_ComparisonType { static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) +static const int Sib_EIZ = 4; // same index value as ESP (used in Index field) +static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field) extern void xSetPtr(void *ptr); extern void xAlignPtr(uint bytes); @@ -210,9 +212,20 @@ public: xWrite8(0x66); } + int GetImmSize() const { + switch (GetOperandSize()) { + case 1: return 1; + case 2: return 2; + case 4: return 4; + case 8: return 4; // Only mov's take 64-bit immediates + jNO_DEFAULT + } + return 0; + } + void xWriteImm(int imm) const { - switch (GetOperandSize()) { + switch (GetImmSize()) { case 1: xWrite8(imm); break; @@ -222,9 +235,6 @@ public: case 4: xWrite32(imm); break; - case 8: - xWrite64(imm); - break; jNO_DEFAULT } @@ -315,6 +325,9 @@ public: { } + /// Get a non-wide version of the register (for use with e.g. mov, where `mov eax, 3` and `mov rax, 3` are functionally identical but `mov eax, 3` is shorter) + virtual const xRegisterInt& GetNonWide() const = 0; + bool operator==(const xRegisterInt &src) const { return Id == src.Id && (GetOperandSize() == src.GetOperandSize()); } bool operator!=(const xRegisterInt &src) const { return !operator==(src); } }; @@ -336,7 +349,8 @@ public: { } - virtual uint GetOperandSize() const { return 1; } + virtual uint GetOperandSize() const override { return 1; } + virtual const xRegisterInt& GetNonWide() const override { return *this; } bool operator==(const xRegister8 &src) const { return Id == src.Id; } bool operator!=(const xRegister8 &src) const { return Id != src.Id; } @@ -356,7 +370,8 @@ public: { } - virtual uint GetOperandSize() const { return 2; } + virtual uint GetOperandSize() const override { return 2; } + virtual const xRegisterInt& GetNonWide() const override { return *this; } bool operator==(const xRegister16 &src) const { return this->Id == src.Id; } bool operator!=(const xRegister16 &src) const { return this->Id != src.Id; } @@ -376,7 +391,8 @@ public: { } - virtual uint GetOperandSize() const { return 4; } + virtual uint GetOperandSize() const override { return 4; } + virtual const xRegisterInt& GetNonWide() const override { return *this; } bool operator==(const xRegister32 &src) const { return this->Id == src.Id; } bool operator!=(const xRegister32 &src) const { return this->Id != src.Id; } @@ -386,17 +402,21 @@ class xRegister64 : public xRegisterInt { typedef xRegisterInt _parent; + xRegister32 m_nonWide; public: xRegister64() : _parent() + , m_nonWide() { } explicit xRegister64(int regId) : _parent(regId) + , m_nonWide(regId) { } - virtual uint GetOperandSize() const { return 8; } + virtual uint GetOperandSize() const override { return 8; } + virtual const xRegisterInt& GetNonWide() const override { return m_nonWide; } bool operator==(const xRegister64 &src) const { return this->Id == src.Id; } bool operator!=(const xRegister64 &src) const { return this->Id != src.Id; } @@ -498,9 +518,9 @@ public: bool IsStackPointer() const { return Id == 4; } xAddressVoid operator+(const xAddressReg &right) const; - xAddressVoid operator+(s32 right) const; + xAddressVoid operator+(sptr right) const; xAddressVoid operator+(const void *right) const; - xAddressVoid operator-(s32 right) const; + xAddressVoid operator-(sptr right) const; xAddressVoid operator-(const void *right) const; xAddressVoid operator*(int factor) const; xAddressVoid operator<<(u32 shift) const; @@ -522,6 +542,11 @@ struct xRegisterEmpty return xRegister16(xRegId_Empty); } + operator xRegister32() const + { + return xRegister32(xRegId_Empty); + } + operator xRegisterSSE() const { return xRegisterSSE(xRegId_Empty); @@ -627,6 +652,13 @@ extern const xAddressReg eax, ebx, ecx, edx, esi, edi, ebp, esp; +// Temporary registers to aid the move to x86-64 +extern const xRegister32 + eaxd, ebxd, ecxd, edxd, + esid, edid, ebpd, espd, + r8d, r9d, r10d, r11d, + r12d, r13d, r14d, r15d; + extern const xRegister16 ax, bx, cx, dx, si, di, bp, sp; @@ -635,6 +667,19 @@ extern const xRegister8 al, dl, bl, ah, ch, dh, bh; +extern const xAddressReg + arg1reg, arg2reg, + arg3reg, arg4reg, + calleeSavedReg1, + calleeSavedReg2; + + +extern const xRegister32 + arg1regd, arg2regd, + calleeSavedReg1d, + calleeSavedReg2d; + + // clang-format on extern const xRegisterCL cl; // I'm special! @@ -661,19 +706,19 @@ public: xAddressReg Base; // base register (no scale) xAddressReg Index; // index reg gets multiplied by the scale int Factor; // scale applied to the index register, in factor form (not a shift!) - s32 Displacement; // address displacement // 4B max even on 64 bits + sptr Displacement; // address displacement // 4B max even on 64 bits but keep rest for assertions public: - xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor = 1, s32 displacement = 0); + xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor = 1, sptr displacement = 0); - xAddressVoid(const xAddressReg &index, int displacement = 0); + xAddressVoid(const xAddressReg &index, sptr displacement = 0); explicit xAddressVoid(const void *displacement); - explicit xAddressVoid(s32 displacement = 0); + explicit xAddressVoid(sptr displacement = 0); public: bool IsByteSizeDisp() const { return is_s8(Displacement); } - xAddressVoid &Add(s32 imm) + xAddressVoid &Add(sptr imm) { Displacement += imm; return *this; @@ -684,13 +729,13 @@ public: __fi xAddressVoid operator+(const xAddressReg &right) const { return xAddressVoid(*this).Add(right); } __fi xAddressVoid operator+(const xAddressVoid &right) const { return xAddressVoid(*this).Add(right); } - __fi xAddressVoid operator+(s32 imm) const { return xAddressVoid(*this).Add(imm); } - __fi xAddressVoid operator-(s32 imm) const { return xAddressVoid(*this).Add(-imm); } + __fi xAddressVoid operator+(sptr imm) const { return xAddressVoid(*this).Add(imm); } + __fi xAddressVoid operator-(sptr imm) const { return xAddressVoid(*this).Add(-imm); } __fi xAddressVoid operator+(const void *addr) const { return xAddressVoid(*this).Add((uptr)addr); } __fi void operator+=(const xAddressReg &right) { Add(right); } - __fi void operator+=(s32 imm) { Add(imm); } - __fi void operator-=(s32 imm) { Add(-imm); } + __fi void operator+=(sptr imm) { Add(imm); } + __fi void operator-=(sptr imm) { Add(-imm); } }; // -------------------------------------------------------------------------------------- @@ -702,7 +747,7 @@ class xAddressInfo : public xAddressVoid typedef xAddressVoid _parent; public: - xAddressInfo(const xAddressReg &base, const xAddressReg &index, int factor = 1, s32 displacement = 0) + xAddressInfo(const xAddressReg &base, const xAddressReg &index, int factor = 1, sptr displacement = 0) : _parent(base, index, factor, displacement) { } @@ -710,17 +755,17 @@ public: /*xAddressInfo( const xAddressVoid& src ) : _parent( src ) {}*/ - explicit xAddressInfo(const xAddressReg &index, int displacement = 0) + explicit xAddressInfo(const xAddressReg &index, sptr displacement = 0) : _parent(index, displacement) { } - explicit xAddressInfo(s32 displacement = 0) + explicit xAddressInfo(sptr displacement = 0) : _parent(displacement) { } - static xAddressInfo FromIndexReg(const xAddressReg &index, int scale = 0, s32 displacement = 0); + static xAddressInfo FromIndexReg(const xAddressReg &index, int scale = 0, sptr displacement = 0); public: using _parent::operator+=; @@ -728,7 +773,7 @@ public: bool IsByteSizeDisp() const { return is_s8(Displacement); } - xAddressInfo &Add(s32 imm) + xAddressInfo &Add(sptr imm) { Displacement += imm; return *this; @@ -747,8 +792,8 @@ public: __fi xAddressInfo operator+(const xAddressReg &right) const { return xAddressInfo(*this).Add(right); } __fi xAddressInfo operator+(const xAddressInfo &right) const { return xAddressInfo(*this).Add(right); } - __fi xAddressInfo operator+(s32 imm) const { return xAddressInfo(*this).Add(imm); } - __fi xAddressInfo operator-(s32 imm) const { return xAddressInfo(*this).Add(-imm); } + __fi xAddressInfo operator+(sptr imm) const { return xAddressInfo(*this).Add(imm); } + __fi xAddressInfo operator-(sptr imm) const { return xAddressInfo(*this).Add(-imm); } __fi xAddressInfo operator+(const void *addr) const { return xAddressInfo(*this).Add((uptr)addr); } __fi void operator+=(const xAddressInfo &right) { Add(right); } @@ -765,7 +810,7 @@ static __fi xAddressVoid operator+(const void *addr, const xAddressVoid &right) return right + addr; } -static __fi xAddressVoid operator+(s32 addr, const xAddressVoid &right) +static __fi xAddressVoid operator+(sptr addr, const xAddressVoid &right) { return right + addr; } @@ -778,7 +823,7 @@ static __fi xAddressInfo operator+(const void *addr, const xAddress } template -static __fi xAddressInfo operator+(s32 addr, const xAddressInfo &right) +static __fi xAddressInfo operator+(sptr addr, const xAddressInfo &right) { return right + addr; } @@ -836,29 +881,31 @@ public: xAddressReg Base; // base register (no scale) xAddressReg Index; // index reg gets multiplied by the scale uint Scale; // scale applied to the index register, in scale/shift form - s32 Displacement; // offset applied to the Base/Index registers. + sptr Displacement; // offset applied to the Base/Index registers. // Displacement is 8/32 bits even on x86_64 + // However we need the whole pointer to calculate rip-relative offsets public: - explicit xIndirectVoid(s32 disp); + explicit xIndirectVoid(sptr disp); explicit xIndirectVoid(const xAddressVoid &src); - xIndirectVoid(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0); + xIndirectVoid(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0); virtual uint GetOperandSize() const; - xIndirectVoid &Add(s32 imm); + xIndirectVoid &Add(sptr imm); bool IsByteSizeDisp() const { return is_s8(Displacement); } bool IsMem() const { return true; } bool IsReg() const { return false; } bool IsExtended() const { return false; } // Non sense but ease template + bool IsWide() const { return GetOperandSize() == 8; } operator xAddressVoid() { return xAddressVoid(Base, Index, Scale, Displacement); } - __fi xIndirectVoid operator+(const s32 imm) const { return xIndirectVoid(*this).Add(imm); } - __fi xIndirectVoid operator-(const s32 imm) const { return xIndirectVoid(*this).Add(-imm); } + __fi xIndirectVoid operator+(const sptr imm) const { return xIndirectVoid(*this).Add(imm); } + __fi xIndirectVoid operator-(const sptr imm) const { return xIndirectVoid(*this).Add(-imm); } protected: void Reduce(); @@ -870,7 +917,7 @@ class xIndirect : public xIndirectVoid typedef xIndirectVoid _parent; public: - explicit xIndirect(s32 disp) + explicit xIndirect(sptr disp) : _parent(disp) { } @@ -878,21 +925,21 @@ public: : _parent(src) { } - xIndirect(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0) + xIndirect(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0) : _parent(base, index, scale, displacement) { } virtual uint GetOperandSize() const { return sizeof(OperandType); } - xIndirect &Add(s32 imm) + xIndirect &Add(sptr imm) { Displacement += imm; return *this; } - __fi xIndirect operator+(const s32 imm) const { return xIndirect(*this).Add(imm); } - __fi xIndirect operator-(const s32 imm) const { return xIndirect(*this).Add(-imm); } + __fi xIndirect operator+(const sptr imm) const { return xIndirect(*this).Add(imm); } + __fi xIndirect operator-(const sptr imm) const { return xIndirect(*this).Add(-imm); } bool operator==(const xIndirect &src) const { @@ -914,6 +961,11 @@ typedef xIndirect xIndirect64; typedef xIndirect xIndirect32; typedef xIndirect xIndirect16; typedef xIndirect xIndirect8; +#ifdef __M_X86_64 +typedef xIndirect xIndirectNative; +#else +typedef xIndirect xIndirectNative; +#endif // -------------------------------------------------------------------------------------- // xIndirect64orLess - base class 64, 32, 16, and 8 bit operand types @@ -952,11 +1004,11 @@ public: protected: //xIndirect64orLess( const xAddressVoid& src ) : _parent( src ) {} - explicit xIndirect64orLess(s32 disp) + explicit xIndirect64orLess(sptr disp) : _parent(disp) { } - xIndirect64orLess(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0) + xIndirect64orLess(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0) : _parent(base, index, scale, displacement) { } @@ -995,6 +1047,7 @@ public: // ptr[] - use this form for instructions which can resolve the address operand size from // the other register operand sizes. extern const xAddressIndexer ptr; +extern const xAddressIndexer ptrNative; extern const xAddressIndexer ptr128; extern const xAddressIndexer ptr64; extern const xAddressIndexer ptr32; @@ -1165,7 +1218,7 @@ static __fi xAddressVoid operator+(const void *addr, const xAddressReg ®) return reg + (sptr)addr; } -static __fi xAddressVoid operator+(s32 addr, const xAddressReg ®) +static __fi xAddressVoid operator+(sptr addr, const xAddressReg ®) { return reg + (sptr)addr; } diff --git a/common/src/x86emitter/groups.cpp b/common/src/x86emitter/groups.cpp index 350901a68c..07b783e416 100644 --- a/common/src/x86emitter/groups.cpp +++ b/common/src/x86emitter/groups.cpp @@ -50,7 +50,7 @@ static void _g1_IndirectImm(G1Type InstType, const xIndirect64orLess &sibdest, i xWrite(imm); } else { u8 opcode = is_s8(imm) ? 0x83 : 0x81; - xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest); + xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest, is_s8(imm) ? 1 : sibdest.GetImmSize()); if (is_s8(imm)) xWrite(imm); @@ -156,7 +156,7 @@ void xImpl_Group2::operator()(const xIndirect64orLess &sibdest, u8 imm) const // special encoding of 1's xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xd0 : 0xd1, InstType, sibdest); } else { - xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest); + xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest, 1); xWrite8(imm); } } @@ -195,7 +195,7 @@ static void _imul_ImmStyle(const xRegisterInt ¶m1, const SrcType ¶m2, in { pxAssert(param1.GetOperandSize() == param2.GetOperandSize()); - xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2); + xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2, is_s8(imm) ? 1 : param1.GetImmSize()); if (is_s8(imm)) xWrite8((u8)imm); diff --git a/common/src/x86emitter/jmp.cpp b/common/src/x86emitter/jmp.cpp index a9b878d4a7..3efbff5418 100644 --- a/common/src/x86emitter/jmp.cpp +++ b/common/src/x86emitter/jmp.cpp @@ -34,12 +34,110 @@ namespace x86Emitter { -void xImpl_JmpCall::operator()(const xRegisterInt &absreg) const { xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg); } -void xImpl_JmpCall::operator()(const xIndirect64orLess &src) const { xOpWrite(0, 0xff, isJmp ? 4 : 2, src); } +void xImpl_JmpCall::operator()(const xAddressReg &absreg) const { + // Jumps are always wide and don't need the rex.W + xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg.GetNonWide()); +} +void xImpl_JmpCall::operator()(const xIndirectNative &src) const { + // Jumps are always wide and don't need the rex.W + EmitRex(0, xIndirect32(src.Base, src.Index, 1, 0)); + xWrite8(0xff); + EmitSibMagic(isJmp ? 4 : 2, src); +} +#ifdef __M_X86_64 +void xImpl_JmpCall::operator()(const xIndirect32 &absreg) const { + xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg); +} +#endif const xImpl_JmpCall xJMP = {true}; const xImpl_JmpCall xCALL = {false}; + +template +void prepareRegsForFastcall(const Reg1 &a1, const Reg2 &a2) { + if (a1.IsEmpty()) return; + + // Make sure we don't mess up if someone tries to fastcall with a1 in arg2reg and a2 in arg1reg + if (a2.Id != arg1reg.Id) { + xMOV(Reg1(arg1reg.Id), a1); + if (!a2.IsEmpty()) { + xMOV(Reg2(arg2reg.Id), a2); + } + } else if (a1.Id != arg2reg.Id) { + xMOV(Reg2(arg2reg.Id), a2); + xMOV(Reg1(arg1reg.Id), a1); + } else { + xPUSH(a1); + xMOV(Reg2(arg2reg.Id), a2); + xPOP(Reg1(arg1reg.Id)); + } +} + +void xImpl_FastCall::operator()(void *f, const xRegister32 &a1, const xRegister32 &a2) const { + prepareRegsForFastcall(a1, a2); + uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f; + if ((sptr)disp == (s32)disp) { + xCALL(f); + } else { + xMOV(rax, ptrNative[f]); + xCALL(rax); + } +} + +#ifdef __M_X86_64 +void xImpl_FastCall::operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2) const { + prepareRegsForFastcall(a1, a2); + uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f; + if ((sptr)disp == (s32)disp) { + xCALL(f); + } else { + xMOV(rax, ptrNative[f]); + xCALL(rax); + } +} + +void xImpl_FastCall::operator()(void *f, u32 a1, const xRegisterLong &a2) const { + if (!a2.IsEmpty()) { xMOV(arg2reg, a2); } + xMOV(arg1reg, a1); + (*this)(f, arg1reg, arg2reg); +} +#endif + +void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const { + if (!a2.IsEmpty()) { xMOV(arg2regd, a2); } + xMOV(arg1regd, a1); + (*this)(f, arg1regd, arg2regd); +} + +void xImpl_FastCall::operator()(void *f, const xIndirect32 &a1) const { + xMOV(arg1regd, a1); + (*this)(f, arg1regd); +} + +void xImpl_FastCall::operator()(void *f, const xIndirectVoid &a1) const { + xMOV(arg1regd, a1); + (*this)(f, arg1regd); +} + +void xImpl_FastCall::operator()(void *f, u32 a1, u32 a2) const { + xMOV(arg1regd, a1); + xMOV(arg2regd, a2); + (*this)(f, arg1regd, arg2regd); +} + +#ifdef __M_X86_64 +void xImpl_FastCall::operator()(const xIndirect32 &f, const xRegisterLong &a1, const xRegisterLong &a2) const { + prepareRegsForFastcall(a1, a2); + xCALL(f); +} +#endif + +void xImpl_FastCall::operator()(const xIndirectNative &f, const xRegisterLong &a1, const xRegisterLong &a2) const { + prepareRegsForFastcall(a1, a2); + xCALL(f); +} + const xImpl_FastCall xFastCall = {}; void xSmartJump::SetTarget() diff --git a/common/src/x86emitter/movs.cpp b/common/src/x86emitter/movs.cpp index 231ef879a4..5ff4b36214 100644 --- a/common/src/x86emitter/movs.cpp +++ b/common/src/x86emitter/movs.cpp @@ -56,15 +56,15 @@ void xImpl_Mov::operator()(const xIndirectVoid &dest, const xRegisterInt &from) // mov eax has a special from when writing directly to a DISP32 address // (sans any register index/base registers). +#ifndef __M_X86_64 + // Note: On x86-64 this is an immediate 64-bit address, which is larger than the equivalent rip offset instr if (from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty()) { -// FIXME: in 64 bits, it could be 8B whereas Displacement is limited to 4B normally -#ifdef __M_X86_64 - pxAssert(0); -#endif - xOpAccWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xa2 : 0xa3, from.Id, dest); + xOpAccWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xa2 : 0xa3, from, dest); xWrite32(dest.Displacement); - } else { - xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from.Id, dest); + } else +#endif + { + xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from, dest); } } @@ -73,40 +73,91 @@ void xImpl_Mov::operator()(const xRegisterInt &to, const xIndirectVoid &src) con // mov eax has a special from when reading directly from a DISP32 address // (sans any register index/base registers). +#ifndef __M_X86_64 + // Note: On x86-64 this is an immediate 64-bit address, which is larger than the equivalent rip offset instr if (to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty()) { -// FIXME: in 64 bits, it could be 8B whereas Displacement is limited to 4B normally -#ifdef __M_X86_64 - pxAssert(0); -#endif xOpAccWrite(to.GetPrefix16(), to.Is8BitOp() ? 0xa0 : 0xa1, to, src); xWrite32(src.Displacement); - } else { + } else +#endif + { xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0x8a : 0x8b, to, src); } } -void xImpl_Mov::operator()(const xIndirect64orLess &dest, int imm) const +void xImpl_Mov::operator()(const xIndirect64orLess &dest, sptr imm) const { - xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest); + switch (dest.GetOperandSize()) { + case 1: + pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!"); + break; + case 2: + pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!"); + break; + case 4: + pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!"); + break; + case 8: + pxAssertMsg(imm == (s32)imm, "Immediate won't fit in immediate slot, go through a register!"); + break; + default: + pxAssertMsg(0, "Bad indirect size!"); + } + xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest, dest.GetImmSize()); dest.xWriteImm(imm); } // preserve_flags - set to true to disable optimizations which could alter the state of // the flags (namely replacing mov reg,0 with xor). -void xImpl_Mov::operator()(const xRegisterInt &to, int imm, bool preserve_flags) const +void xImpl_Mov::operator()(const xRegisterInt &to, sptr imm, bool preserve_flags) const { - if (!preserve_flags && (imm == 0)) - _g1_EmitOp(G1Type_XOR, to, to); - else { + switch (to.GetOperandSize()) { + case 1: + pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!"); + break; + case 2: + pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!"); + break; + case 4: + pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!"); + break; + case 8: + pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit in immediate slot, use mov64 or lea!"); + break; + default: + pxAssertMsg(0, "Bad indirect size!"); + } + const xRegisterInt& to_ = to.GetNonWide(); + if (!preserve_flags && (imm == 0)) { + _g1_EmitOp(G1Type_XOR, to_, to_); + } else if (imm == (u32)imm || !to.IsWide()) { // Note: MOV does not have (reg16/32,imm8) forms. - u8 opcode = (to.Is8BitOp() ? 0xb0 : 0xb8) | to.Id; - xOpAccWrite(to.GetPrefix16(), opcode, 0, to); + u8 opcode = (to_.Is8BitOp() ? 0xb0 : 0xb8) | to_.Id; + xOpAccWrite(to_.GetPrefix16(), opcode, 0, to_); + to_.xWriteImm(imm); + } else { + xOpWrite(to.GetPrefix16(), 0xc7, 0, to); to.xWriteImm(imm); } } const xImpl_Mov xMOV; +#ifdef __M_X86_64 +void xImpl_MovImm64::operator()(const xRegister64& to, s64 imm, bool preserve_flags) const +{ + if (imm == (u32)imm || imm == (s32)imm) { + xMOV(to, imm, preserve_flags); + } else { + u8 opcode = 0xb8 | to.Id; + xOpAccWrite(to.GetPrefix16(), opcode, 0, to); + xWrite64(imm); + } +} + +const xImpl_MovImm64 xMOV64; +#endif + // -------------------------------------------------------------------------------------- // CMOVcc // -------------------------------------------------------------------------------------- diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index 2942ab335b..8b6c3e5158 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -97,6 +97,7 @@ __fi void xWrite64(u64 val) // objects be initialized even though they have no actual variable members). const xAddressIndexer ptr = {}; +const xAddressIndexer ptrNative = {}; const xAddressIndexer ptr128 = {}; const xAddressIndexer ptr64 = {}; const xAddressIndexer ptr32 = {}; @@ -135,6 +136,16 @@ const xAddressReg esp(4), ebp(5), esi(6), edi(7); +const xRegister32 + eaxd(0), ebxd(3), + ecxd(1), edxd(2), + espd(4), ebpd(5), + esid(6), edid(7), + r8d(8), r9d(9), + r10d(10), r11d(11), + r12d(12), r13d(13), + r14d(14), r15d(15); + const xRegister16 ax(0), bx(3), cx(1), dx(2), @@ -147,6 +158,41 @@ const xRegister8 ah(4), ch(5), dh(6), bh(7); +#if defined(_WIN32) || !defined(__M_X86_64) +const xAddressReg + arg1reg = rcx, + arg2reg = rdx, +#ifdef __M_X86_64 + arg3reg = r8, + arg4reg = r9, +#else + arg3reg = xRegisterEmpty(), + arg4reg = xRegisterEmpty(), +#endif + calleeSavedReg1 = rdi, + calleeSavedReg2 = rsi; + +const xRegister32 + arg1regd = ecxd, + arg2regd = edxd, + calleeSavedReg1d = edid, + calleeSavedReg2d = esid; +#else +const xAddressReg + arg1reg = rdi, + arg2reg = rsi, + arg3reg = rdx, + arg4reg = rcx, + calleeSavedReg1 = r12, + calleeSavedReg2 = r13; + +const xRegister32 + arg1regd = edid, + arg2regd = esid, + calleeSavedReg1d = r12d, + calleeSavedReg2d = r13d; +#endif + // clang-format on const xRegisterCL cl; @@ -250,16 +296,22 @@ static __fi void SibSB(u32 ss, u32 index, u32 base) xWrite8((ss << 6) | (index << 3) | base); } -void EmitSibMagic(uint regfield, const void *address) +void EmitSibMagic(uint regfield, const void *address, int extraRIPOffset) { - ModRM(0, regfield, ModRm_UseDisp32); - - // SIB encoding only supports 32bit offsets, even on x86_64 - // We must make sure that the displacement is within the 32bit range - // Else we will fail out in a spectacular fashion sptr displacement = (sptr)address; -#ifdef __M_X86_64 - pxAssertDev(displacement >= -0x80000000LL && displacement < 0x80000000LL, "SIB target is too far away, needs an indirect register"); +#ifndef __M_X86_64 + ModRM(0, regfield, ModRm_UseDisp32); +#else + sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset); + // Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter) + if (ripRelative == (s32)ripRelative) { + ModRM(0, regfield, ModRm_UseDisp32); + displacement = ripRelative; + } else { + pxAssertDev(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register"); + ModRM(0, regfield, ModRm_UseSib); + SibSB(0, Sib_EIZ, Sib_UseDisp32); + } #endif xWrite((s32)displacement); @@ -293,7 +345,7 @@ static __fi bool NeedsSibMagic(const xIndirectVoid &info) // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // -void EmitSibMagic(uint regfield, const xIndirectVoid &info) +void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset) { // 3 bits also on x86_64 (so max is 8) // We might need to mask it on x86_64 @@ -302,6 +354,8 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info) ((info.IsByteSizeDisp()) ? 1 : 2); pxAssert(!info.Base.IsEmpty() || !info.Index.IsEmpty() || displacement_size == 2); + // Displacement is only 64 bits for rip-relative addressing + pxAssert(info.Displacement == (s32)info.Displacement || (info.Base.IsEmpty() && info.Index.IsEmpty())); if (!NeedsSibMagic(info)) { // Use ModRm-only encoding, with the rm field holding an index/base register, if @@ -310,13 +364,13 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info) // encoded *with* a displacement of 0, if it would otherwise not have one). if (info.Index.IsEmpty()) { - EmitSibMagic(regfield, (void *)info.Displacement); + EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset); return; } else { if (info.Index == ebp && displacement_size == 0) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - ModRM(displacement_size, regfield, info.Index.Id); + ModRM(displacement_size, regfield, info.Index.Id & 7); } } else { // In order to encode "just" index*scale (and no base), we have to encode @@ -327,7 +381,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info) if (info.Base.IsEmpty()) { ModRM(0, regfield, ModRm_UseSib); - SibSB(info.Scale, info.Index.Id, ModRm_UseDisp32); + SibSB(info.Scale, info.Index.Id, Sib_UseDisp32); xWrite(info.Displacement); return; } else { @@ -335,7 +389,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! ModRM(displacement_size, regfield, ModRm_UseSib); - SibSB(info.Scale, info.Index.Id, info.Base.Id); + SibSB(info.Scale, info.Index.Id & 7, info.Base.Id & 7); } } @@ -349,24 +403,24 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info) // Writes a ModRM byte for "Direct" register access forms, which is used for all // instructions taking a form of [reg,reg]. -void EmitSibMagic(uint reg1, const xRegisterBase ®2) +void EmitSibMagic(uint reg1, const xRegisterBase ®2, int) { - xWrite8((Mod_Direct << 6) | (reg1 << 3) | reg2.Id); + xWrite8((Mod_Direct << 6) | (reg1 << 3) | (reg2.Id & 7)); } -void EmitSibMagic(const xRegisterBase ®1, const xRegisterBase ®2) +void EmitSibMagic(const xRegisterBase ®1, const xRegisterBase ®2, int) { - xWrite8((Mod_Direct << 6) | (reg1.Id << 3) | reg2.Id); + xWrite8((Mod_Direct << 6) | ((reg1.Id & 7) << 3) | (reg2.Id & 7)); } -void EmitSibMagic(const xRegisterBase ®1, const void *src) +void EmitSibMagic(const xRegisterBase ®1, const void *src, int extraRIPOffset) { - EmitSibMagic(reg1.Id, src); + EmitSibMagic(reg1.Id & 7, src, extraRIPOffset); } -void EmitSibMagic(const xRegisterBase ®1, const xIndirectVoid &sib) +void EmitSibMagic(const xRegisterBase ®1, const xIndirectVoid &sib, int extraRIPOffset) { - EmitSibMagic(reg1.Id, sib); + EmitSibMagic(reg1.Id & 7, sib, extraRIPOffset); } ////////////////////////////////////////////////////////////////////////////////////////// @@ -391,10 +445,14 @@ void EmitRex(uint regfield, const void *address) void EmitRex(uint regfield, const xIndirectVoid &info) { - bool w = info.Base.IsWide(); + bool w = info.IsWide(); bool r = false; - bool x = false; - bool b = info.IsExtended(); + bool x = info.Index.IsExtended(); + bool b = info.Base.IsExtended(); + if (!NeedsSibMagic(info)) { + b = x; + x = false; + } EmitRex(w, r, x, b); } @@ -432,6 +490,33 @@ void EmitRex(const xRegisterBase ®1, const xIndirectVoid &sib) bool r = reg1.IsExtended(); bool x = sib.Index.IsExtended(); bool b = sib.Base.IsExtended(); + if (!NeedsSibMagic(sib)) { + b = x; + x = false; + } + EmitRex(w, r, x, b); +} + +// For use by instructions that are implicitly wide +void EmitRexImplicitlyWide(const xRegisterBase ®) +{ + bool w = false; + bool r = false; + bool x = false; + bool b = reg.IsExtended(); + EmitRex(w, r, x, b); +} + +void EmitRexImplicitlyWide(const xIndirectVoid &sib) +{ + bool w = false; + bool r = false; + bool x = sib.Index.IsExtended(); + bool b = sib.Base.IsExtended(); + if (!NeedsSibMagic(sib)) { + b = x; + x = false; + } EmitRex(w, r, x, b); } @@ -459,7 +544,7 @@ __emitinline u8 *xGetPtr() __emitinline void xAlignPtr(uint bytes) { // forward align - x86Ptr = (u8 *)(((uptr)x86Ptr + bytes - 1) & ~(bytes - 1)); + x86Ptr = (u8 *)(((uptr)x86Ptr + bytes - 1) & ~(uptr)(bytes - 1)); } // Performs best-case alignment for the target CPU, for use prior to starting a new @@ -506,7 +591,7 @@ xAddressVoid xAddressReg::operator+(const xAddressReg &right) const return xAddressVoid(*this, right); } -xAddressVoid xAddressReg::operator+(s32 right) const +xAddressVoid xAddressReg::operator+(sptr right) const { pxAssertMsg(Id != -1, "Uninitialized x86 register."); return xAddressVoid(*this, right); @@ -518,7 +603,7 @@ xAddressVoid xAddressReg::operator+(const void *right) const return xAddressVoid(*this, (sptr)right); } -xAddressVoid xAddressReg::operator-(s32 right) const +xAddressVoid xAddressReg::operator-(sptr right) const { pxAssertMsg(Id != -1, "Uninitialized x86 register."); return xAddressVoid(*this, -right); @@ -547,7 +632,7 @@ xAddressVoid xAddressReg::operator<<(u32 shift) const // xAddressVoid (method implementations) // -------------------------------------------------------------------------------------- -xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor, s32 displacement) +xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor, sptr displacement) { Base = base; Index = index; @@ -558,7 +643,7 @@ xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, in pxAssertMsg(index.Id != xRegId_Invalid, "Uninitialized x86 register."); } -xAddressVoid::xAddressVoid(const xAddressReg &index, s32 displacement) +xAddressVoid::xAddressVoid(const xAddressReg &index, sptr displacement) { Base = xEmptyReg; Index = index; @@ -568,7 +653,7 @@ xAddressVoid::xAddressVoid(const xAddressReg &index, s32 displacement) pxAssertMsg(index.Id != xRegId_Invalid, "Uninitialized x86 register."); } -xAddressVoid::xAddressVoid(s32 displacement) +xAddressVoid::xAddressVoid(sptr displacement) { Base = xEmptyReg; Index = xEmptyReg; @@ -581,12 +666,7 @@ xAddressVoid::xAddressVoid(const void *displacement) Base = xEmptyReg; Index = xEmptyReg; Factor = 0; -#ifdef __M_X86_64 - pxAssert(0); -//Displacement = (s32)displacement; -#else - Displacement = (s32)displacement; -#endif + Displacement = (sptr)displacement; } xAddressVoid &xAddressVoid::Add(const xAddressReg &src) @@ -643,7 +723,7 @@ xIndirectVoid::xIndirectVoid(const xAddressVoid &src) Reduce(); } -xIndirectVoid::xIndirectVoid(s32 disp) +xIndirectVoid::xIndirectVoid(sptr disp) { Base = xEmptyReg; Index = xEmptyReg; @@ -653,7 +733,7 @@ xIndirectVoid::xIndirectVoid(s32 disp) // no reduction necessary :D } -xIndirectVoid::xIndirectVoid(xAddressReg base, xAddressReg index, int scale, s32 displacement) +xIndirectVoid::xIndirectVoid(xAddressReg base, xAddressReg index, int scale, sptr displacement) { Base = base; Index = index; @@ -754,7 +834,7 @@ uint xIndirectVoid::GetOperandSize() const return 0; } -xIndirectVoid &xIndirectVoid::Add(s32 imm) +xIndirectVoid &xIndirectVoid::Add(sptr imm) { Displacement += imm; return *this; @@ -775,7 +855,11 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool // See EmitSibMagic for commenting on SIB encoding. - if (!NeedsSibMagic(src)) { + // We should allow native-sized addressing regs (e.g. lea eax, [rax]) + const xRegisterInt& sizeMatchedIndex = to.IsWide() ? src.Index : src.Index.GetNonWide(); + const xRegisterInt& sizeMatchedBase = to.IsWide() ? src.Base : src.Base.GetNonWide(); + + if (!NeedsSibMagic(src) && src.Displacement == (s32)src.Displacement) { // LEA Land: means we have either 1-register encoding or just an offset. // offset is encodable as an immediate MOV, and a register is encodable // as a register MOV. @@ -783,24 +867,17 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool if (src.Index.IsEmpty()) { xMOV(to, src.Displacement); return; - } else if (displacement_size == 0) { - _xMovRtoR(to, src.Index); + } + else if (displacement_size == 0) { + _xMovRtoR(to, sizeMatchedIndex); return; - } else { - if (!preserve_flags) { - // encode as MOV and ADD combo. Make sure to use the immediate on the - // ADD since it can encode as an 8-bit sign-extended value. + } else if (!preserve_flags) { + // encode as MOV and ADD combo. Make sure to use the immediate on the + // ADD since it can encode as an 8-bit sign-extended value. - _xMovRtoR(to, src.Index); - xADD(to, src.Displacement); - return; - } else { - // note: no need to do ebp+0 check since we encode all 0 displacements as - // register assignments above (via MOV) - - xWrite8(0x8d); - ModRM(displacement_size, to.Id, src.Index.Id); - } + _xMovRtoR(to, sizeMatchedIndex); + xADD(to, src.Displacement); + return; } } else { if (src.Base.IsEmpty()) { @@ -816,49 +893,32 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool xSHL(to, src.Scale); return; } - xWrite8(0x8d); - ModRM(0, to.Id, ModRm_UseSib); - SibSB(src.Scale, src.Index.Id, ModRm_UseDisp32); - xWrite32(src.Displacement); - return; } else { if (src.Scale == 0) { if (!preserve_flags) { if (src.Index == esp) { // ESP is not encodable as an index (ix86 ignores it), thus: - _xMovRtoR(to, src.Base); // will do the trick! + _xMovRtoR(to, sizeMatchedBase); // will do the trick! if (src.Displacement) xADD(to, src.Displacement); return; } else if (src.Displacement == 0) { - _xMovRtoR(to, src.Base); - _g1_EmitOp(G1Type_ADD, to, src.Index); + _xMovRtoR(to, sizeMatchedBase); + _g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex); return; } } else if ((src.Index == esp) && (src.Displacement == 0)) { // special case handling of ESP as Index, which is replaceable with // a single MOV even when preserve_flags is set! :D - _xMovRtoR(to, src.Base); + _xMovRtoR(to, sizeMatchedBase); return; } } - - if (src.Base == ebp && displacement_size == 0) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - - xWrite8(0x8d); - ModRM(displacement_size, to.Id, ModRm_UseSib); - SibSB(src.Scale, src.Index.Id, src.Base.Id); } } - if (displacement_size != 0) { - if (displacement_size == 1) - xWrite(src.Displacement); - else - xWrite(src.Displacement); - } + xOpWrite(0, 0x8d, to, src); } __emitinline void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags) @@ -888,7 +948,7 @@ void xImpl_Test::operator()(const xRegisterInt &to, const xRegisterInt &from) co void xImpl_Test::operator()(const xIndirect64orLess &dest, int imm) const { - xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xf6 : 0xf7, 0, dest); + xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xf6 : 0xf7, 0, dest, dest.GetImmSize()); dest.xWriteImm(imm); } @@ -918,12 +978,12 @@ void xImpl_IncDec::operator()(const xRegisterInt &to) const u8 regfield = isDec ? 1 : 0; xOpWrite(to.GetPrefix16(), 0xfe, regfield, to); } else { -#ifdef __M_X86_64 - pxAssertMsg(0, "Single Byte INC/DEC aren't valid in 64 bits." - "You need to use the ModR/M form (FF/0 FF/1 opcodes)"); -#endif + #ifdef __M_X86_64 + xOpWrite(to.GetPrefix16(), 0xff, isDec ? 1 : 0, to); + #else to.prefix16(); xWrite8((isDec ? 0x48 : 0x40) | to.Id); + #endif } } @@ -977,24 +1037,37 @@ const xImpl_DwordShift xSHRD = {0xac}; __emitinline void xPOP(const xIndirectVoid &from) { + EmitRexImplicitlyWide(from); xWrite8(0x8f); EmitSibMagic(0, from); } __emitinline void xPUSH(const xIndirectVoid &from) { + EmitRexImplicitlyWide(from); xWrite8(0xff); EmitSibMagic(6, from); } -__fi void xPOP(xRegister32or64 from) { xWrite8(0x58 | from->Id); } +__fi void xPOP(xRegister32or64 from) { + EmitRexImplicitlyWide(from); + xWrite8(0x58 | (from->Id & 7)); +} __fi void xPUSH(u32 imm) { - xWrite8(0x68); - xWrite32(imm); + if (is_s8(imm)) { + xWrite8(0x6a); + xWrite8(imm); + } else { + xWrite8(0x68); + xWrite32(imm); + } +} +__fi void xPUSH(xRegister32or64 from) { + EmitRexImplicitlyWide(from); + xWrite8(0x50 | (from->Id & 7)); } -__fi void xPUSH(xRegister32or64 from) { xWrite8(0x50 | from->Id); } // pushes the EFLAGS register onto the stack __fi void xPUSHFD() { xWrite8(0x9C); } @@ -1053,17 +1126,18 @@ __emitinline void xRestoreReg(const xRegisterSSE &dest) ////////////////////////////////////////////////////////////////////////////////////////// // Helper object to handle ABI frame -#ifdef __GNUC__ - #ifdef __M_X86_64 -// GCC ensures/requires stack to be 16 bytes aligned (but when?) + +// All x86-64 calling conventions ensure/require stack to be 16 bytes aligned +// I couldn't find documentation on when, but compilers would indicate it's before the call: https://gcc.godbolt.org/z/KzTfsz #define ALIGN_STACK(v) xADD(rsp, v) -#else + +#elif defined(__GNUC__) + // GCC ensures/requires stack to be 16 bytes aligned before the call // Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes. // EBP will take 4 bytes if m_base_frame is enabled #define ALIGN_STACK(v) xADD(esp, v) -#endif #else @@ -1077,41 +1151,35 @@ xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, in m_save_base_pointer = save_base_pointer; m_offset = offset; -#ifdef __M_X86_64 - - m_offset += 8; // Call stores the return address (4 bytes) + m_offset += sizeof(void*); // Call stores the return address (4 bytes) // Note rbp can surely be optimized in 64 bits if (m_base_frame) { xPUSH(rbp); xMOV(rbp, rsp); - m_offset += 8; + m_offset += sizeof(void*); } else if (m_save_base_pointer) { xPUSH(rbp); - m_offset += 8; + m_offset += sizeof(void*); } +#ifdef __M_X86_64 + xPUSH(rbx); xPUSH(r12); xPUSH(r13); xPUSH(r14); xPUSH(r15); m_offset += 40; +#ifdef _WIN32 + xPUSH(rdi); + xPUSH(rsi); + xSUB(rsp, 32); // Windows calling convention specifies additional space for the callee to spill registers + m_offset += 48; +#endif #else - m_offset += 4; // Call stores the return address (4 bytes) - - // Create a new frame - if (m_base_frame) { - xPUSH(ebp); - xMOV(ebp, esp); - m_offset += 4; - } else if (m_save_base_pointer) { - xPUSH(ebp); - m_offset += 4; - } - // Save the register context xPUSH(edi); xPUSH(esi); @@ -1130,19 +1198,17 @@ xScopedStackFrame::~xScopedStackFrame() #ifdef __M_X86_64 // Restore the register context +#ifdef _WIN32 + xADD(rsp, 32); + xPOP(rsi); + xPOP(rdi); +#endif xPOP(r15); xPOP(r14); xPOP(r13); xPOP(r12); xPOP(rbx); - // Destroy the frame - if (m_base_frame) { - xLEAVE(); - } else if (m_save_base_pointer) { - xPOP(rbp); - } - #else // Restore the register context @@ -1150,14 +1216,14 @@ xScopedStackFrame::~xScopedStackFrame() xPOP(esi); xPOP(edi); +#endif + // Destroy the frame if (m_base_frame) { xLEAVE(); } else if (m_save_base_pointer) { - xPOP(ebp); + xPOP(rbp); } - -#endif } } // End namespace x86Emitter diff --git a/tests/ctest/CMakeLists.txt b/tests/ctest/CMakeLists.txt new file mode 100644 index 0000000000..6d0e19b1ca --- /dev/null +++ b/tests/ctest/CMakeLists.txt @@ -0,0 +1,12 @@ +enable_testing() +add_custom_target(unittests) +add_custom_command(TARGET unittests POST_BUILD COMMAND ${CMAKE_CTEST_COMMAND}) + +macro(add_pcsx2_test target) + add_executable(${target} EXCLUDE_FROM_ALL ${ARGN}) + target_link_libraries(${target} PRIVATE x86emitter gtest_main Utilities) + add_dependencies(unittests ${target}) + add_test(NAME ${target} COMMAND ${target}) +endmacro() + +add_subdirectory(x86emitter) diff --git a/tests/ctest/x86emitter/CMakeLists.txt b/tests/ctest/x86emitter/CMakeLists.txt new file mode 100644 index 0000000000..b3fa050bb6 --- /dev/null +++ b/tests/ctest/x86emitter/CMakeLists.txt @@ -0,0 +1 @@ +add_pcsx2_test(x86emitter_test codegen_tests.cpp codegen_tests_main.cpp codegen_tests.h) diff --git a/tests/ctest/x86emitter/codegen_tests.cpp b/tests/ctest/x86emitter/codegen_tests.cpp new file mode 100644 index 0000000000..607ed40496 --- /dev/null +++ b/tests/ctest/x86emitter/codegen_tests.cpp @@ -0,0 +1,48 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2020 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include +#include + +using namespace x86Emitter; + +thread_local const char *currentTest; + +void pxOnAssert(const DiagnosticOrigin &origin, const wxString &msg) { + FAIL() << "Assertion failed: " << msg + << "\n at " << origin.srcfile << ":" << origin.line << "" + << "\n when trying to assemble " << currentTest; +} + +void runCodegenTest(void (*exec)(void *base), const char* description, const char* expected) { + u8 code[4096]; + memset(code, 0xcc, sizeof(code)); + char str[4096] = {0}; + + if (!expected) return; + currentTest = description; + xSetPtr(code); + exec(code); + char *strPtr = str; + for (u8* ptr = code; ptr < xGetPtr(); ptr++) { + sprintf(strPtr, "%02x ", *ptr); + strPtr += 3; + } + if (strPtr != str) { + // Remove final space + *--strPtr = '\0'; + } + EXPECT_STRCASEEQ(expected, str) << "Unexpected codegen from " << description; +} diff --git a/tests/ctest/x86emitter/codegen_tests.h b/tests/ctest/x86emitter/codegen_tests.h new file mode 100644 index 0000000000..e785666562 --- /dev/null +++ b/tests/ctest/x86emitter/codegen_tests.h @@ -0,0 +1,29 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2020 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +void runCodegenTest(void (*exec)(void *base), const char* description, const char* expected); + +// Use null to skip, empty string to expect no output +#ifdef __M_X86_64 +# define CODEGEN_TEST(command, expected32, expected64) runCodegenTest([](void *base){ command; }, #command, expected64) +# define CODEGEN_TEST_64(command, expected) CODEGEN_TEST(command, nullptr, expected) +# define CODEGEN_TEST_32(command, expected) +#else +# define CODEGEN_TEST(command, expected32, expected64) runCodegenTest([](void *base){ command; }, #command, expected32) +# define CODEGEN_TEST_64(command, expected) +# define CODEGEN_TEST_32(command, expected) CODEGEN_TEST(command, expected, nullptr) +#endif + +#define CODEGEN_TEST_BOTH(command, expected) CODEGEN_TEST(command, expected, expected) diff --git a/tests/ctest/x86emitter/codegen_tests_main.cpp b/tests/ctest/x86emitter/codegen_tests_main.cpp new file mode 100644 index 0000000000..8ca63be930 --- /dev/null +++ b/tests/ctest/x86emitter/codegen_tests_main.cpp @@ -0,0 +1,161 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2020 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "codegen_tests.h" +#include +#include +#include + +using namespace x86Emitter; + +TEST(CodegenTests, MOVTest) +{ + CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0"); + CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8"); + CODEGEN_TEST_BOTH(xMOV(eaxd, ecxd), "89 c8"); + CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0"); + CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0"); + CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0"); + CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8"); + CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01"); + CODEGEN_TEST_BOTH(xMOV(eaxd, ptrNative[rcx]), "8b 01"); + CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08"); + CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecxd), "89 08"); + CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00"); + CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00"); + CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01"); + CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08"); + CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03"); + CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03"); + CODEGEN_TEST_BOTH(xMOV(eaxd, ptr32[rbx*4+3+rcx]), "8b 44 99 03"); + CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecxd), "89 4c 98 03"); + CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03"); + CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03"); + CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00"); + CODEGEN_TEST_BOTH(xMOV(ptr32[rax], 0), "c7 00 00 00 00 00"); + CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], -1), "c7 44 98 03 ff ff ff ff"); + CODEGEN_TEST_64(xMOV(rax, 0xffffffff), "b8 ff ff ff ff"); + CODEGEN_TEST_64(xMOV(r8, -1), "49 c7 c0 ff ff ff ff"); + CODEGEN_TEST_64(xMOV64(rax, 0x1234567890), "48 b8 90 78 56 34 12 00 00 00"); + CODEGEN_TEST_64(xMOV64(r8, 0x1234567890), "49 b8 90 78 56 34 12 00 00 00"); + CODEGEN_TEST_64(xMOV(ptr32[base], 0x12), "c7 05 f6 ff ff ff 12 00 00 00"); +} + +TEST(CodegenTests, LEATest) +{ + CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx + CODEGEN_TEST_BOTH(xLEA(eaxd, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx + CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8 + CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9 + CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03"); + CODEGEN_TEST_BOTH(xLEA(eaxd, ptr32[rbx*4+3+rcx]), "8d 44 99 03"); + CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03"); + CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff"); + CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234 +} + +TEST(CodegenTests, PUSHTest) +{ + CODEGEN_TEST_BOTH(xPUSH(rax), "50"); + CODEGEN_TEST_64(xPUSH(r8), "41 50"); + CODEGEN_TEST_BOTH(xPUSH(0x1234), "68 34 12 00 00"); + CODEGEN_TEST_BOTH(xPUSH(0x12), "6a 12"); + CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax]), "ff 30"); + CODEGEN_TEST_64(xPUSH(ptrNative[r8]), "41 ff 30"); + CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax*2+3+rbx]), "ff 74 43 03"); + CODEGEN_TEST_64(xPUSH(ptrNative[rax*2+3+r8]), "41 ff 74 40 03"); + CODEGEN_TEST_64(xPUSH(ptrNative[r9*4+3+r8]), "43 ff 74 88 03"); + CODEGEN_TEST_64(xPUSH(ptrNative[r8*4+3+rax]), "42 ff 74 80 03"); + CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax*8+0x1234+rbx]), "ff b4 c3 34 12 00 00"); + CODEGEN_TEST_64(xPUSH(ptrNative[base]), "ff 35 fa ff ff ff"); + CODEGEN_TEST(xPUSH(ptrNative[(void*)0x1234]), "ff 35 34 12 00 00", "ff 34 25 34 12 00 00"); +} + +TEST(CodegenTests, POPTest) +{ + CODEGEN_TEST_BOTH(xPOP(rax), "58"); + CODEGEN_TEST_64(xPOP(r8), "41 58"); + CODEGEN_TEST_BOTH(xPOP(ptrNative[rax]), "8f 00"); + CODEGEN_TEST_64(xPOP(ptrNative[r8]), "41 8f 00"); + CODEGEN_TEST_BOTH(xPOP(ptrNative[rax*2+3+rbx]), "8f 44 43 03"); + CODEGEN_TEST_64(xPOP(ptrNative[rax*2+3+r8]), "41 8f 44 40 03"); + CODEGEN_TEST_64(xPOP(ptrNative[r9*4+3+r8]), "43 8f 44 88 03"); + CODEGEN_TEST_64(xPOP(ptrNative[r8*4+3+rax]), "42 8f 44 80 03"); + CODEGEN_TEST_BOTH(xPOP(ptrNative[rax*8+0x1234+rbx]), "8f 84 c3 34 12 00 00"); + CODEGEN_TEST_64(xPOP(ptrNative[base]), "8f 05 fa ff ff ff"); + CODEGEN_TEST(xPOP(ptrNative[(void*)0x1234]), "8f 05 34 12 00 00", "8f 04 25 34 12 00 00"); +} + +TEST(CodegenTests, MathTest) +{ + CODEGEN_TEST(xINC(eaxd), "40", "ff c0"); + CODEGEN_TEST(xDEC(rax), "48", "48 ff c8"); + CODEGEN_TEST_64(xINC(r8), "49 ff c0"); + CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8"); + CODEGEN_TEST_64(xADD(r8, 0x12), "49 83 c0 12"); + CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00"); + CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0"); + CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00"); + CODEGEN_TEST_BOTH(xADD(eaxd, ebxd), "01 d8"); + CODEGEN_TEST_BOTH(xADD(eaxd, 0x1234), "05 34 12 00 00"); + CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03"); + CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03"); + CODEGEN_TEST_BOTH(xADD(eaxd, ptr32[rbx*4+3+rcx]), "03 44 99 03"); + CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecxd), "01 4c 83 03"); + CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12"); + CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00"); + CODEGEN_TEST_BOTH(xSUB(eaxd, ptr32[rcx*4+rax]), "2b 04 88"); + CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff"); + CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00"); + CODEGEN_TEST_BOTH(xDIV(ecxd), "f7 f9"); +} + +TEST(CodegenTests, BitwiseTest) +{ + CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8"); + CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8"); + CODEGEN_TEST_BOTH(xSHR(ecxd, cl), "d3 e9"); + CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8"); + CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c"); + CODEGEN_TEST_BOTH(xSAR(eaxd, 30), "c1 f8 1e"); + CODEGEN_TEST_BOTH(xSHL(ebxd, 30), "c1 e3 1e"); + CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04"); + CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8"); + CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02"); + CODEGEN_TEST_BOTH(xOR(esid, ptr32[rax+rbx]), "0b 34 18"); + CODEGEN_TEST_64(xNOT(r8), "49 f7 d0"); + CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10"); + CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13"); +} + +TEST(CodegenTests, JmpTest) +{ + CODEGEN_TEST_64(xJMP(r8), "41 ff e0"); + CODEGEN_TEST_BOTH(xJMP(rdi), "ff e7"); + CODEGEN_TEST_BOTH(xJMP(ptrNative[rax]), "ff 20"); + CODEGEN_TEST_BOTH(xJA(base), "77 fe"); + CODEGEN_TEST_BOTH(xJB((char*)base - 0xFFFF), "0f 82 fb ff fe ff"); +} + +TEST(CodegenTests, SSETest) +{ + CODEGEN_TEST_BOTH(xMOVAPS(xmm0, xmm1), "0f 28 c1"); + CODEGEN_TEST_64(xMOVAPS(xmm8, xmm9), "45 0f 28 c1"); + CODEGEN_TEST_64(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08"); + CODEGEN_TEST_64(xMOVAPS(ptr128[rax+r9], xmm8), "46 0f 29 04 08"); + CODEGEN_TEST_BOTH(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55"); + CODEGEN_TEST_64(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa"); + CODEGEN_TEST_64(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02"); +}