From 4f56db9f1844055967cd3aee8bf64613f2878515 Mon Sep 17 00:00:00 2001
From: tellowkrinkle <tellowkrinkle@users.noreply.github.com>
Date: Wed, 19 Aug 2020 03:19:28 -0500
Subject: [PATCH] Fix codegen on x86-64 (#3512)

Fix codegen on x86-64

Part 1 of the changes being worked on in #3451

Makes x86emitter emit the x86-64 machine code you would expect it to

Also adds some unit tests to verify that things are working
---
 .gitmodules                                   |   3 +
 3rdparty/gtest                                |   1 +
 CMakeLists.txt                                |   6 +
 cmake/BuildParameters.cmake                   |   3 +-
 cmake/SearchForStuff.cmake                    |   8 +
 common/include/Pcsx2Defs.h                    |   4 -
 common/include/x86emitter/implement/jmpcall.h | 133 ++------
 common/include/x86emitter/implement/movs.h    |  18 +-
 common/include/x86emitter/instructions.h      |   3 +
 common/include/x86emitter/internal.h          |  24 +-
 common/include/x86emitter/x86types.h          | 137 +++++---
 common/src/x86emitter/groups.cpp              |   6 +-
 common/src/x86emitter/jmp.cpp                 | 102 +++++-
 common/src/x86emitter/movs.cpp                |  91 ++++--
 common/src/x86emitter/x86emitter.cpp          | 304 +++++++++++-------
 tests/ctest/CMakeLists.txt                    |  12 +
 tests/ctest/x86emitter/CMakeLists.txt         |   1 +
 tests/ctest/x86emitter/codegen_tests.cpp      |  48 +++
 tests/ctest/x86emitter/codegen_tests.h        |  29 ++
 tests/ctest/x86emitter/codegen_tests_main.cpp | 161 ++++++++++
 20 files changed, 784 insertions(+), 310 deletions(-)
 create mode 160000 3rdparty/gtest
 create mode 100644 tests/ctest/CMakeLists.txt
 create mode 100644 tests/ctest/x86emitter/CMakeLists.txt
 create mode 100644 tests/ctest/x86emitter/codegen_tests.cpp
 create mode 100644 tests/ctest/x86emitter/codegen_tests.h
 create mode 100644 tests/ctest/x86emitter/codegen_tests_main.cpp

diff --git a/.gitmodules b/.gitmodules
index 39a8fed5ca..93248523d1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "3rdparty/xz/xz"]
 	path = 3rdparty/xz/xz
 	url = https://github.com/PCSX2/xz.git
+[submodule "3rdparty/gtest"]
+	path = 3rdparty/gtest
+	url = https://github.com/google/googletest.git
diff --git a/3rdparty/gtest b/3rdparty/gtest
new file mode 160000
index 0000000000..703bd9caab
--- /dev/null
+++ b/3rdparty/gtest
@@ -0,0 +1 @@
+Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bd74628c85..fa21c5161e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,6 +57,12 @@ if(EXISTS "${CMAKE_SOURCE_DIR}/plugins")
     add_subdirectory(plugins)
 endif()
 
+# tests
+if(ACTUALLY_ENABLE_TESTS)
+    add_subdirectory(3rdparty/gtest EXCLUDE_FROM_ALL)
+    add_subdirectory(tests/ctest)
+endif()
+
 #-------------------------------------------------------------------------------
 
 # Install some files to ease package creation
diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake
index e1ca1ee922..e59ed0961d 100644
--- a/cmake/BuildParameters.cmake
+++ b/cmake/BuildParameters.cmake
@@ -21,6 +21,7 @@
 # Misc option
 #-------------------------------------------------------------------------------
 option(DISABLE_BUILD_DATE "Disable including the binary compile date")
+option(ENABLE_TESTS "Enables building the unit tests" ON)
 
 if(DISABLE_BUILD_DATE OR openSUSE)
     message(STATUS "Disabling the inclusion of the binary compile date.")
@@ -241,7 +242,7 @@ elseif(${PCSX2_TARGET_ARCHITECTURES} MATCHES "x86_64")
             set(ARCH_FLAG "-march=native")
         endif()
     endif()
-    add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1)
+    add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1 -D__M_X86_64=1)
     set(_ARCH_64 1)
     set(_M_X86 1)
     set(_M_X86_64 1)
diff --git a/cmake/SearchForStuff.cmake b/cmake/SearchForStuff.cmake
index 781cc6d321..e9f5d6ec18 100644
--- a/cmake/SearchForStuff.cmake
+++ b/cmake/SearchForStuff.cmake
@@ -197,6 +197,14 @@ if(HarfBuzz_FOUND)
 include_directories(${HarfBuzz_INCLUDE_DIRS})
 endif()
 
+set(ACTUALLY_ENABLE_TESTS ${ENABLE_TESTS})
+if(ENABLE_TESTS)
+	if(NOT EXISTS "${CMAKE_SOURCE_DIR}/3rdparty/gtest/CMakeLists.txt")
+		message(WARNING "ENABLE_TESTS was on but gtest was not found, unit tests will not be enabled")
+		set(ACTUALLY_ENABLE_TESTS Off)
+	endif()
+endif()
+
 #----------------------------------------
 #  Use  project-wide include directories
 #----------------------------------------
diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h
index 02a931a464..e941b0ae20 100644
--- a/common/include/Pcsx2Defs.h
+++ b/common/include/Pcsx2Defs.h
@@ -241,7 +241,3 @@ static const int __pagesize = PCSX2_PAGESIZE;
 #define __fc __fastcall
 
 #endif
-
-#if defined(__x86_64__) || defined(_M_AMD64)
-#define __M_X86_64
-#endif
diff --git a/common/include/x86emitter/implement/jmpcall.h b/common/include/x86emitter/implement/jmpcall.h
index bf69cf43ad..ffc45066ab 100644
--- a/common/include/x86emitter/implement/jmpcall.h
+++ b/common/include/x86emitter/implement/jmpcall.h
@@ -27,8 +27,12 @@ struct xImpl_JmpCall
 {
     bool isJmp;
 
-    void operator()(const xRegisterInt &absreg) const;
-    void operator()(const xIndirect64orLess &src) const;
+    void operator()(const xAddressReg &absreg) const;
+    void operator()(const xIndirectNative &src) const;
+#ifdef __M_X86_64
+    [[deprecated]] // Should move to xIndirectNative
+    void operator()(const xIndirect32 &absreg) const;
+#endif
 
     // Special form for calling functions.  This form automatically resolves the
     // correct displacement based on the size of the instruction being generated.
@@ -41,6 +45,7 @@ struct xImpl_JmpCall
             // always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic).
 
             sptr dest = (sptr)func - ((sptr)xGetPtr() + 5);
+            pxAssertMsg(dest == (s32)dest, "Indirect jump is too far, must use a register!");
             xWrite8(0xe8);
             xWrite32(dest);
         }
@@ -56,131 +61,43 @@ struct xImpl_FastCall
 // FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
 // some registers. But I think it is enough to handle the first call.
 
+    void operator()(void *f, const xRegister32 &a1 = xEmptyReg, const xRegister32 &a2 = xEmptyReg) const;
+
+    void operator()(void *f, u32 a1, const xRegister32 &a2) const;
+    void operator()(void *f, const xIndirect32 &a1) const;
+    void operator()(void *f, u32 a1, u32 a2) const;
 
-// Type unsafety is nice
 #ifdef __M_X86_64
-
-#define XFASTCALL \
-    xCALL(f);
-
-#define XFASTCALL1 \
-    xMOV(rdi, a1); \
-    xCALL(f);
-
-#define XFASTCALL2 \
-    xMOV(rdi, a1); \
-    xMOV(rsi, a2); \
-    xCALL(f);
-
-#else
-
-#define XFASTCALL \
-    xCALL(f);
-
-#define XFASTCALL1 \
-    xMOV(ecx, a1); \
-    xCALL(f);
-
-#define XFASTCALL2 \
-    xMOV(ecx, a1); \
-    xMOV(edx, a2); \
-    xCALL(f);
-
+    void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const;
+    void operator()(void *f, u32 a1, const xRegisterLong &a2) const;
+    [[deprecated]] // Switch to xIndirect32, as the size of this isn't obvious
 #endif
+    void operator()(void *f, const xIndirectVoid &a1) const;
 
-    void operator()(void *f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const
+    template <typename T>
+    __fi void operator()(T *func, u32 a1, const xRegisterLong &a2 = xEmptyReg) const
     {
-#ifdef __M_X86_64
-        if (a1.IsEmpty()) {
-            XFASTCALL;
-        } else if (a2.IsEmpty()) {
-            XFASTCALL1;
-        } else {
-            XFASTCALL2;
-        }
-#else
-        if (a1.IsEmpty()) {
-            XFASTCALL;
-        } else if (a2.IsEmpty()) {
-            XFASTCALL1;
-        } else {
-            XFASTCALL2;
-        }
-#endif
+        (*this)((void *)func, a1, a2);
     }
 
     template <typename T>
-    __fi void operator()(T *func, u32 a1, const xRegisterLong &a2) const
+    __fi void operator()(T *func, const xIndirect32 &a1) const
     {
-        void *f = (void *)func;
-
-#ifdef __M_X86_64
-        XFASTCALL2;
-#else
-        XFASTCALL2;
-#endif
-    }
-
-    template <typename T>
-    __fi void operator()(T *func, const xIndirectVoid &a1) const
-    {
-        void *f = (void *)func;
-
-#ifdef __M_X86_64
-        XFASTCALL1;
-#else
-        XFASTCALL1;
-#endif
+        (*this)((void*)func, a1);
     }
 
     template <typename T>
     __fi void operator()(T *func, u32 a1, u32 a2) const
     {
-        void *f = (void *)func;
-
-#ifdef __M_X86_64
-        XFASTCALL2;
-#else
-        XFASTCALL2;
-#endif
+        (*this)((void*)func, a1, a2);
     }
 
-    template <typename T>
-    __fi void operator()(T *func, u32 a1) const
-    {
-        void *f = (void *)func;
-
 #ifdef __M_X86_64
-        XFASTCALL1;
-#else
-        XFASTCALL1;
+    [[deprecated]] // Switch to xIndirectNative
+    void operator()(const xIndirect32 &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const;
 #endif
-    }
 
-    void operator()(const xIndirect32 &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const
-    {
-#ifdef __M_X86_64
-        if (a1.IsEmpty()) {
-            XFASTCALL;
-        } else if (a2.IsEmpty()) {
-            XFASTCALL1;
-        } else {
-            XFASTCALL2;
-        }
-#else
-        if (a1.IsEmpty()) {
-            XFASTCALL;
-        } else if (a2.IsEmpty()) {
-            XFASTCALL1;
-        } else {
-            XFASTCALL2;
-        }
-#endif
-    }
-
-#undef XFASTCALL
-#undef XFASTCALL1
-#undef XFASTCALL2
+    void operator()(const xIndirectNative &f, const xRegisterLong &a1 = xEmptyReg, const xRegisterLong &a2 = xEmptyReg) const;
 };
 
 } // End namespace x86Emitter
diff --git a/common/include/x86emitter/implement/movs.h b/common/include/x86emitter/implement/movs.h
index d159a0ad90..598afca20b 100644
--- a/common/include/x86emitter/implement/movs.h
+++ b/common/include/x86emitter/implement/movs.h
@@ -33,8 +33,8 @@ struct xImpl_Mov
     void operator()(const xRegisterInt &to, const xRegisterInt &from) const;
     void operator()(const xIndirectVoid &dest, const xRegisterInt &from) const;
     void operator()(const xRegisterInt &to, const xIndirectVoid &src) const;
-    void operator()(const xIndirect64orLess &dest, int imm) const;
-    void operator()(const xRegisterInt &to, int imm, bool preserve_flags = false) const;
+    void operator()(const xIndirect64orLess &dest, sptr imm) const;
+    void operator()(const xRegisterInt &to, sptr imm, bool preserve_flags = false) const;
 
 #if 0
 	template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg<T>& immOrReg ) const
@@ -70,6 +70,20 @@ struct xImpl_Mov
 #endif
 };
 
+#ifdef __M_X86_64
+// --------------------------------------------------------------------------------------
+//  xImpl_MovImm64
+// --------------------------------------------------------------------------------------
+// Mov with 64-bit immediates (only available on 64-bit platforms)
+//
+struct xImpl_MovImm64
+{
+    xImpl_MovImm64() {} // Satisfy GCC's whims.
+
+    void operator()(const xRegister64 &to, s64 imm, bool preserve_flags = false) const;
+};
+#endif
+
 // --------------------------------------------------------------------------------------
 //  xImpl_CMov
 // --------------------------------------------------------------------------------------
diff --git a/common/include/x86emitter/instructions.h b/common/include/x86emitter/instructions.h
index 9f9a1ff435..ce78611b4b 100644
--- a/common/include/x86emitter/instructions.h
+++ b/common/include/x86emitter/instructions.h
@@ -57,6 +57,9 @@ extern const xImpl_G1Compare xCMP;
 // flags.
 
 extern const xImpl_Mov xMOV;
+#ifdef __M_X86_64
+extern const xImpl_MovImm64 xMOV64;
+#endif
 extern const xImpl_Test xTEST;
 
 extern const xImpl_Group2 xROL, xROR,
diff --git a/common/include/x86emitter/internal.h b/common/include/x86emitter/internal.h
index aa5682d046..231630ea43 100644
--- a/common/include/x86emitter/internal.h
+++ b/common/include/x86emitter/internal.h
@@ -25,12 +25,12 @@ namespace x86Emitter
 #define OpWriteSSE(pre, op) xOpWrite0F(pre, op, to, from)
 
 extern void SimdPrefix(u8 prefix, u16 opcode);
-extern void EmitSibMagic(uint regfield, const void *address);
-extern void EmitSibMagic(uint regfield, const xIndirectVoid &info);
-extern void EmitSibMagic(uint reg1, const xRegisterBase &reg2);
-extern void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2);
-extern void EmitSibMagic(const xRegisterBase &reg1, const void *src);
-extern void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib);
+extern void EmitSibMagic(uint regfield, const void *address, int extraRIPOffset = 0);
+extern void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset = 0);
+extern void EmitSibMagic(uint reg1, const xRegisterBase &reg2, int = 0);
+extern void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2, int = 0);
+extern void EmitSibMagic(const xRegisterBase &reg1, const void *src, int extraRIPOffset = 0);
+extern void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib, int extraRIPOffset = 0);
 
 extern void EmitRex(uint regfield, const void *address);
 extern void EmitRex(uint regfield, const xIndirectVoid &info);
@@ -49,7 +49,7 @@ inline void xWrite(T val)
 }
 
 template <typename T1, typename T2>
-__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 &param1, const T2 &param2)
+__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 &param1, const T2 &param2, int extraRIPOffset = 0)
 {
     if (prefix != 0)
         xWrite8(prefix);
@@ -57,7 +57,7 @@ __emitinline void xOpWrite(u8 prefix, u8 opcode, const T1 &param1, const T2 &par
 
     xWrite8(opcode);
 
-    EmitSibMagic(param1, param2);
+    EmitSibMagic(param1, param2, extraRIPOffset);
 }
 
 template <typename T1, typename T2>
@@ -96,7 +96,13 @@ __emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1 &param1, const T2 &
 template <typename T1, typename T2>
 __emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1 &param1, const T2 &param2, u8 imm8)
 {
-    xOpWrite0F(prefix, opcode, param1, param2);
+    if (prefix != 0)
+        xWrite8(prefix);
+    EmitRex(param1, param2);
+
+    SimdPrefix(0, opcode);
+
+    EmitSibMagic(param1, param2, 1);
     xWrite8(imm8);
 }
 
diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h
index 4854c1aacf..155f8a37c8 100644
--- a/common/include/x86emitter/x86types.h
+++ b/common/include/x86emitter/x86types.h
@@ -181,6 +181,8 @@ enum SSE2_ComparisonType {
 
 static const int ModRm_UseSib = 4;    // same index value as ESP (used in RM field)
 static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
+static const int Sib_EIZ = 4;         // same index value as ESP (used in Index field)
+static const int Sib_UseDisp32 = 5;   // same index value as EBP (used in Base field)
 
 extern void xSetPtr(void *ptr);
 extern void xAlignPtr(uint bytes);
@@ -210,9 +212,20 @@ public:
             xWrite8(0x66);
     }
 
+    int GetImmSize() const {
+        switch (GetOperandSize()) {
+            case 1: return 1;
+            case 2: return 2;
+            case 4: return 4;
+            case 8: return 4; // Only mov's take 64-bit immediates
+                jNO_DEFAULT
+        }
+        return 0;
+    }
+
     void xWriteImm(int imm) const
     {
-        switch (GetOperandSize()) {
+        switch (GetImmSize()) {
             case 1:
                 xWrite8(imm);
                 break;
@@ -222,9 +235,6 @@ public:
             case 4:
                 xWrite32(imm);
                 break;
-            case 8:
-                xWrite64(imm);
-                break;
 
                 jNO_DEFAULT
         }
@@ -315,6 +325,9 @@ public:
     {
     }
 
+    /// Get a non-wide version of the register (for use with e.g. mov, where `mov eax, 3` and `mov rax, 3` are functionally identical but `mov eax, 3` is shorter)
+    virtual const xRegisterInt& GetNonWide() const = 0;
+
     bool operator==(const xRegisterInt &src) const { return Id == src.Id && (GetOperandSize() == src.GetOperandSize()); }
     bool operator!=(const xRegisterInt &src) const { return !operator==(src); }
 };
@@ -336,7 +349,8 @@ public:
     {
     }
 
-    virtual uint GetOperandSize() const { return 1; }
+    virtual uint GetOperandSize() const override { return 1; }
+    virtual const xRegisterInt& GetNonWide() const override { return *this; }
 
     bool operator==(const xRegister8 &src) const { return Id == src.Id; }
     bool operator!=(const xRegister8 &src) const { return Id != src.Id; }
@@ -356,7 +370,8 @@ public:
     {
     }
 
-    virtual uint GetOperandSize() const { return 2; }
+    virtual uint GetOperandSize() const override { return 2; }
+    virtual const xRegisterInt& GetNonWide() const override { return *this; }
 
     bool operator==(const xRegister16 &src) const { return this->Id == src.Id; }
     bool operator!=(const xRegister16 &src) const { return this->Id != src.Id; }
@@ -376,7 +391,8 @@ public:
     {
     }
 
-    virtual uint GetOperandSize() const { return 4; }
+    virtual uint GetOperandSize() const override { return 4; }
+    virtual const xRegisterInt& GetNonWide() const override { return *this; }
 
     bool operator==(const xRegister32 &src) const { return this->Id == src.Id; }
     bool operator!=(const xRegister32 &src) const { return this->Id != src.Id; }
@@ -386,17 +402,21 @@ class xRegister64 : public xRegisterInt
 {
     typedef xRegisterInt _parent;
 
+    xRegister32 m_nonWide;
 public:
     xRegister64()
         : _parent()
+        , m_nonWide()
     {
     }
     explicit xRegister64(int regId)
         : _parent(regId)
+        , m_nonWide(regId)
     {
     }
 
-    virtual uint GetOperandSize() const { return 8; }
+    virtual uint GetOperandSize() const override { return 8; }
+    virtual const xRegisterInt& GetNonWide() const override { return m_nonWide; }
 
     bool operator==(const xRegister64 &src) const { return this->Id == src.Id; }
     bool operator!=(const xRegister64 &src) const { return this->Id != src.Id; }
@@ -498,9 +518,9 @@ public:
     bool IsStackPointer() const { return Id == 4; }
 
     xAddressVoid operator+(const xAddressReg &right) const;
-    xAddressVoid operator+(s32 right) const;
+    xAddressVoid operator+(sptr right) const;
     xAddressVoid operator+(const void *right) const;
-    xAddressVoid operator-(s32 right) const;
+    xAddressVoid operator-(sptr right) const;
     xAddressVoid operator-(const void *right) const;
     xAddressVoid operator*(int factor) const;
     xAddressVoid operator<<(u32 shift) const;
@@ -522,6 +542,11 @@ struct xRegisterEmpty
         return xRegister16(xRegId_Empty);
     }
 
+    operator xRegister32() const
+    {
+        return xRegister32(xRegId_Empty);
+    }
+
     operator xRegisterSSE() const
     {
         return xRegisterSSE(xRegId_Empty);
@@ -627,6 +652,13 @@ extern const xAddressReg
     eax, ebx, ecx, edx,
     esi, edi, ebp, esp;
 
+// Temporary registers to aid the move to x86-64
+extern const xRegister32
+    eaxd, ebxd, ecxd, edxd,
+    esid, edid, ebpd, espd,
+     r8d,  r9d, r10d, r11d,
+    r12d, r13d, r14d, r15d;
+
 extern const xRegister16
     ax, bx, cx, dx,
     si, di, bp, sp;
@@ -635,6 +667,19 @@ extern const xRegister8
     al, dl, bl,
     ah, ch, dh, bh;
 
+extern const xAddressReg
+    arg1reg, arg2reg,
+    arg3reg, arg4reg,
+    calleeSavedReg1,
+    calleeSavedReg2;
+
+
+extern const xRegister32
+    arg1regd, arg2regd,
+    calleeSavedReg1d,
+    calleeSavedReg2d;
+
+
 // clang-format on
 
 extern const xRegisterCL cl; // I'm special!
@@ -661,19 +706,19 @@ public:
     xAddressReg Base;  // base register (no scale)
     xAddressReg Index; // index reg gets multiplied by the scale
     int Factor;        // scale applied to the index register, in factor form (not a shift!)
-    s32 Displacement;  // address displacement // 4B max even on 64 bits
+    sptr Displacement;  // address displacement // 4B max even on 64 bits but keep rest for assertions
 
 public:
-    xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor = 1, s32 displacement = 0);
+    xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor = 1, sptr displacement = 0);
 
-    xAddressVoid(const xAddressReg &index, int displacement = 0);
+    xAddressVoid(const xAddressReg &index, sptr displacement = 0);
     explicit xAddressVoid(const void *displacement);
-    explicit xAddressVoid(s32 displacement = 0);
+    explicit xAddressVoid(sptr displacement = 0);
 
 public:
     bool IsByteSizeDisp() const { return is_s8(Displacement); }
 
-    xAddressVoid &Add(s32 imm)
+    xAddressVoid &Add(sptr imm)
     {
         Displacement += imm;
         return *this;
@@ -684,13 +729,13 @@ public:
 
     __fi xAddressVoid operator+(const xAddressReg &right) const { return xAddressVoid(*this).Add(right); }
     __fi xAddressVoid operator+(const xAddressVoid &right) const { return xAddressVoid(*this).Add(right); }
-    __fi xAddressVoid operator+(s32 imm) const { return xAddressVoid(*this).Add(imm); }
-    __fi xAddressVoid operator-(s32 imm) const { return xAddressVoid(*this).Add(-imm); }
+    __fi xAddressVoid operator+(sptr imm) const { return xAddressVoid(*this).Add(imm); }
+    __fi xAddressVoid operator-(sptr imm) const { return xAddressVoid(*this).Add(-imm); }
     __fi xAddressVoid operator+(const void *addr) const { return xAddressVoid(*this).Add((uptr)addr); }
 
     __fi void operator+=(const xAddressReg &right) { Add(right); }
-    __fi void operator+=(s32 imm) { Add(imm); }
-    __fi void operator-=(s32 imm) { Add(-imm); }
+    __fi void operator+=(sptr imm) { Add(imm); }
+    __fi void operator-=(sptr imm) { Add(-imm); }
 };
 
 // --------------------------------------------------------------------------------------
@@ -702,7 +747,7 @@ class xAddressInfo : public xAddressVoid
     typedef xAddressVoid _parent;
 
 public:
-    xAddressInfo(const xAddressReg &base, const xAddressReg &index, int factor = 1, s32 displacement = 0)
+    xAddressInfo(const xAddressReg &base, const xAddressReg &index, int factor = 1, sptr displacement = 0)
         : _parent(base, index, factor, displacement)
     {
     }
@@ -710,17 +755,17 @@ public:
     /*xAddressInfo( const xAddressVoid& src )
 			: _parent( src ) {}*/
 
-    explicit xAddressInfo(const xAddressReg &index, int displacement = 0)
+    explicit xAddressInfo(const xAddressReg &index, sptr displacement = 0)
         : _parent(index, displacement)
     {
     }
 
-    explicit xAddressInfo(s32 displacement = 0)
+    explicit xAddressInfo(sptr displacement = 0)
         : _parent(displacement)
     {
     }
 
-    static xAddressInfo<BaseType> FromIndexReg(const xAddressReg &index, int scale = 0, s32 displacement = 0);
+    static xAddressInfo<BaseType> FromIndexReg(const xAddressReg &index, int scale = 0, sptr displacement = 0);
 
 public:
     using _parent::operator+=;
@@ -728,7 +773,7 @@ public:
 
     bool IsByteSizeDisp() const { return is_s8(Displacement); }
 
-    xAddressInfo<BaseType> &Add(s32 imm)
+    xAddressInfo<BaseType> &Add(sptr imm)
     {
         Displacement += imm;
         return *this;
@@ -747,8 +792,8 @@ public:
 
     __fi xAddressInfo<BaseType> operator+(const xAddressReg &right) const { return xAddressInfo(*this).Add(right); }
     __fi xAddressInfo<BaseType> operator+(const xAddressInfo<BaseType> &right) const { return xAddressInfo(*this).Add(right); }
-    __fi xAddressInfo<BaseType> operator+(s32 imm) const { return xAddressInfo(*this).Add(imm); }
-    __fi xAddressInfo<BaseType> operator-(s32 imm) const { return xAddressInfo(*this).Add(-imm); }
+    __fi xAddressInfo<BaseType> operator+(sptr imm) const { return xAddressInfo(*this).Add(imm); }
+    __fi xAddressInfo<BaseType> operator-(sptr imm) const { return xAddressInfo(*this).Add(-imm); }
     __fi xAddressInfo<BaseType> operator+(const void *addr) const { return xAddressInfo(*this).Add((uptr)addr); }
 
     __fi void operator+=(const xAddressInfo<BaseType> &right) { Add(right); }
@@ -765,7 +810,7 @@ static __fi xAddressVoid operator+(const void *addr, const xAddressVoid &right)
     return right + addr;
 }
 
-static __fi xAddressVoid operator+(s32 addr, const xAddressVoid &right)
+static __fi xAddressVoid operator+(sptr addr, const xAddressVoid &right)
 {
     return right + addr;
 }
@@ -778,7 +823,7 @@ static __fi xAddressInfo<OperandType> operator+(const void *addr, const xAddress
 }
 
 template <typename OperandType>
-static __fi xAddressInfo<OperandType> operator+(s32 addr, const xAddressInfo<OperandType> &right)
+static __fi xAddressInfo<OperandType> operator+(sptr addr, const xAddressInfo<OperandType> &right)
 {
     return right + addr;
 }
@@ -836,29 +881,31 @@ public:
     xAddressReg Base;  // base register (no scale)
     xAddressReg Index; // index reg gets multiplied by the scale
     uint Scale;        // scale applied to the index register, in scale/shift form
-    s32 Displacement;  // offset applied to the Base/Index registers.
+    sptr Displacement; // offset applied to the Base/Index registers.
                        // Displacement is 8/32 bits even on x86_64
+                       // However we need the whole pointer to calculate rip-relative offsets
 
 public:
-    explicit xIndirectVoid(s32 disp);
+    explicit xIndirectVoid(sptr disp);
     explicit xIndirectVoid(const xAddressVoid &src);
-    xIndirectVoid(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0);
+    xIndirectVoid(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0);
 
     virtual uint GetOperandSize() const;
-    xIndirectVoid &Add(s32 imm);
+    xIndirectVoid &Add(sptr imm);
 
     bool IsByteSizeDisp() const { return is_s8(Displacement); }
     bool IsMem() const { return true; }
     bool IsReg() const { return false; }
     bool IsExtended() const { return false; } // Non sense but ease template
+    bool IsWide() const { return GetOperandSize() == 8; }
 
     operator xAddressVoid()
     {
         return xAddressVoid(Base, Index, Scale, Displacement);
     }
 
-    __fi xIndirectVoid operator+(const s32 imm) const { return xIndirectVoid(*this).Add(imm); }
-    __fi xIndirectVoid operator-(const s32 imm) const { return xIndirectVoid(*this).Add(-imm); }
+    __fi xIndirectVoid operator+(const sptr imm) const { return xIndirectVoid(*this).Add(imm); }
+    __fi xIndirectVoid operator-(const sptr imm) const { return xIndirectVoid(*this).Add(-imm); }
 
 protected:
     void Reduce();
@@ -870,7 +917,7 @@ class xIndirect : public xIndirectVoid
     typedef xIndirectVoid _parent;
 
 public:
-    explicit xIndirect(s32 disp)
+    explicit xIndirect(sptr disp)
         : _parent(disp)
     {
     }
@@ -878,21 +925,21 @@ public:
         : _parent(src)
     {
     }
-    xIndirect(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0)
+    xIndirect(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0)
         : _parent(base, index, scale, displacement)
     {
     }
 
     virtual uint GetOperandSize() const { return sizeof(OperandType); }
 
-    xIndirect<OperandType> &Add(s32 imm)
+    xIndirect<OperandType> &Add(sptr imm)
     {
         Displacement += imm;
         return *this;
     }
 
-    __fi xIndirect<OperandType> operator+(const s32 imm) const { return xIndirect(*this).Add(imm); }
-    __fi xIndirect<OperandType> operator-(const s32 imm) const { return xIndirect(*this).Add(-imm); }
+    __fi xIndirect<OperandType> operator+(const sptr imm) const { return xIndirect(*this).Add(imm); }
+    __fi xIndirect<OperandType> operator-(const sptr imm) const { return xIndirect(*this).Add(-imm); }
 
     bool operator==(const xIndirect<OperandType> &src) const
     {
@@ -914,6 +961,11 @@ typedef xIndirect<u64> xIndirect64;
 typedef xIndirect<u32> xIndirect32;
 typedef xIndirect<u16> xIndirect16;
 typedef xIndirect<u8> xIndirect8;
+#ifdef __M_X86_64
+typedef xIndirect<u64> xIndirectNative;
+#else
+typedef xIndirect<u32> xIndirectNative;
+#endif
 
 // --------------------------------------------------------------------------------------
 //  xIndirect64orLess  -  base class 64, 32, 16, and 8 bit operand types
@@ -952,11 +1004,11 @@ public:
 protected:
     //xIndirect64orLess( const xAddressVoid& src ) : _parent( src ) {}
 
-    explicit xIndirect64orLess(s32 disp)
+    explicit xIndirect64orLess(sptr disp)
         : _parent(disp)
     {
     }
-    xIndirect64orLess(xAddressReg base, xAddressReg index, int scale = 0, s32 displacement = 0)
+    xIndirect64orLess(xAddressReg base, xAddressReg index, int scale = 0, sptr displacement = 0)
         : _parent(base, index, scale, displacement)
     {
     }
@@ -995,6 +1047,7 @@ public:
 // ptr[] - use this form for instructions which can resolve the address operand size from
 // the other register operand sizes.
 extern const xAddressIndexer<xIndirectVoid> ptr;
+extern const xAddressIndexer<xIndirectNative> ptrNative;
 extern const xAddressIndexer<xIndirect128> ptr128;
 extern const xAddressIndexer<xIndirect64> ptr64;
 extern const xAddressIndexer<xIndirect32> ptr32;
@@ -1165,7 +1218,7 @@ static __fi xAddressVoid operator+(const void *addr, const xAddressReg &reg)
     return reg + (sptr)addr;
 }
 
-static __fi xAddressVoid operator+(s32 addr, const xAddressReg &reg)
+static __fi xAddressVoid operator+(sptr addr, const xAddressReg &reg)
 {
     return reg + (sptr)addr;
 }
diff --git a/common/src/x86emitter/groups.cpp b/common/src/x86emitter/groups.cpp
index 350901a68c..07b783e416 100644
--- a/common/src/x86emitter/groups.cpp
+++ b/common/src/x86emitter/groups.cpp
@@ -50,7 +50,7 @@ static void _g1_IndirectImm(G1Type InstType, const xIndirect64orLess &sibdest, i
         xWrite<s8>(imm);
     } else {
         u8 opcode = is_s8(imm) ? 0x83 : 0x81;
-        xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest);
+        xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest, is_s8(imm) ? 1 : sibdest.GetImmSize());
 
         if (is_s8(imm))
             xWrite<s8>(imm);
@@ -156,7 +156,7 @@ void xImpl_Group2::operator()(const xIndirect64orLess &sibdest, u8 imm) const
         // special encoding of 1's
         xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xd0 : 0xd1, InstType, sibdest);
     } else {
-        xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest);
+        xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest, 1);
         xWrite8(imm);
     }
 }
@@ -195,7 +195,7 @@ static void _imul_ImmStyle(const xRegisterInt &param1, const SrcType &param2, in
 {
     pxAssert(param1.GetOperandSize() == param2.GetOperandSize());
 
-    xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2);
+    xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2, is_s8(imm) ? 1 : param1.GetImmSize());
 
     if (is_s8(imm))
         xWrite8((u8)imm);
diff --git a/common/src/x86emitter/jmp.cpp b/common/src/x86emitter/jmp.cpp
index a9b878d4a7..3efbff5418 100644
--- a/common/src/x86emitter/jmp.cpp
+++ b/common/src/x86emitter/jmp.cpp
@@ -34,12 +34,110 @@
 namespace x86Emitter
 {
 
-void xImpl_JmpCall::operator()(const xRegisterInt &absreg) const { xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg); }
-void xImpl_JmpCall::operator()(const xIndirect64orLess &src) const { xOpWrite(0, 0xff, isJmp ? 4 : 2, src); }
+void xImpl_JmpCall::operator()(const xAddressReg &absreg) const {
+    // Jumps are always wide and don't need the rex.W
+    xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg.GetNonWide());
+}
+void xImpl_JmpCall::operator()(const xIndirectNative &src) const {
+    // Jumps are always wide and don't need the rex.W
+    EmitRex(0, xIndirect32(src.Base, src.Index, 1, 0));
+    xWrite8(0xff);
+    EmitSibMagic(isJmp ? 4 : 2, src);
+}
+#ifdef __M_X86_64
+void xImpl_JmpCall::operator()(const xIndirect32 &absreg) const {
+    xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg);
+}
+#endif
 
 const xImpl_JmpCall xJMP = {true};
 const xImpl_JmpCall xCALL = {false};
 
+
+template <typename Reg1, typename Reg2>
+void prepareRegsForFastcall(const Reg1 &a1, const Reg2 &a2) {
+    if (a1.IsEmpty()) return;
+
+    // Make sure we don't mess up if someone tries to fastcall with a1 in arg2reg and a2 in arg1reg
+    if (a2.Id != arg1reg.Id) {
+        xMOV(Reg1(arg1reg.Id), a1);
+        if (!a2.IsEmpty()) {
+            xMOV(Reg2(arg2reg.Id), a2);
+        }
+    } else if (a1.Id != arg2reg.Id) {
+        xMOV(Reg2(arg2reg.Id), a2);
+        xMOV(Reg1(arg1reg.Id), a1);
+    } else {
+        xPUSH(a1);
+        xMOV(Reg2(arg2reg.Id), a2);
+        xPOP(Reg1(arg1reg.Id));
+    }
+}
+
+void xImpl_FastCall::operator()(void *f, const xRegister32 &a1, const xRegister32 &a2) const {
+    prepareRegsForFastcall(a1, a2);
+    uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
+    if ((sptr)disp == (s32)disp) {
+        xCALL(f);
+    } else {
+        xMOV(rax, ptrNative[f]);
+        xCALL(rax);
+    }
+}
+
+#ifdef __M_X86_64
+void xImpl_FastCall::operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2) const {
+    prepareRegsForFastcall(a1, a2);
+    uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
+    if ((sptr)disp == (s32)disp) {
+        xCALL(f);
+    } else {
+        xMOV(rax, ptrNative[f]);
+        xCALL(rax);
+    }
+}
+
+void xImpl_FastCall::operator()(void *f, u32 a1, const xRegisterLong &a2) const {
+    if (!a2.IsEmpty()) { xMOV(arg2reg, a2); }
+    xMOV(arg1reg, a1);
+    (*this)(f, arg1reg, arg2reg);
+}
+#endif
+
+void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const {
+    if (!a2.IsEmpty()) { xMOV(arg2regd, a2); }
+    xMOV(arg1regd, a1);
+    (*this)(f, arg1regd, arg2regd);
+}
+
+void xImpl_FastCall::operator()(void *f, const xIndirect32 &a1) const {
+    xMOV(arg1regd, a1);
+    (*this)(f, arg1regd);
+}
+
+void xImpl_FastCall::operator()(void *f, const xIndirectVoid &a1) const {
+    xMOV(arg1regd, a1);
+    (*this)(f, arg1regd);
+}
+
+void xImpl_FastCall::operator()(void *f, u32 a1, u32 a2) const {
+    xMOV(arg1regd, a1);
+    xMOV(arg2regd, a2);
+    (*this)(f, arg1regd, arg2regd);
+}
+
+#ifdef __M_X86_64
+void xImpl_FastCall::operator()(const xIndirect32 &f, const xRegisterLong &a1, const xRegisterLong &a2) const {
+    prepareRegsForFastcall(a1, a2);
+    xCALL(f);
+}
+#endif
+
+void xImpl_FastCall::operator()(const xIndirectNative &f, const xRegisterLong &a1, const xRegisterLong &a2) const {
+    prepareRegsForFastcall(a1, a2);
+    xCALL(f);
+}
+
 const xImpl_FastCall xFastCall = {};
 
 void xSmartJump::SetTarget()
diff --git a/common/src/x86emitter/movs.cpp b/common/src/x86emitter/movs.cpp
index 231ef879a4..5ff4b36214 100644
--- a/common/src/x86emitter/movs.cpp
+++ b/common/src/x86emitter/movs.cpp
@@ -56,15 +56,15 @@ void xImpl_Mov::operator()(const xIndirectVoid &dest, const xRegisterInt &from)
     // mov eax has a special from when writing directly to a DISP32 address
     // (sans any register index/base registers).
 
+#ifndef __M_X86_64
+    // Note: On x86-64 this is an immediate 64-bit address, which is larger than the equivalent rip offset instr
     if (from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty()) {
-// FIXME: in 64 bits, it could be 8B whereas Displacement is limited to 4B normally
-#ifdef __M_X86_64
-        pxAssert(0);
-#endif
-        xOpAccWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xa2 : 0xa3, from.Id, dest);
+        xOpAccWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xa2 : 0xa3, from, dest);
         xWrite32(dest.Displacement);
-    } else {
-        xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from.Id, dest);
+    } else
+#endif
+    {
+        xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from, dest);
     }
 }
 
@@ -73,40 +73,91 @@ void xImpl_Mov::operator()(const xRegisterInt &to, const xIndirectVoid &src) con
     // mov eax has a special from when reading directly from a DISP32 address
     // (sans any register index/base registers).
 
+#ifndef __M_X86_64
+    // Note: On x86-64 this is an immediate 64-bit address, which is larger than the equivalent rip offset instr
     if (to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty()) {
-// FIXME: in 64 bits, it could be 8B whereas Displacement is limited to 4B normally
-#ifdef __M_X86_64
-        pxAssert(0);
-#endif
         xOpAccWrite(to.GetPrefix16(), to.Is8BitOp() ? 0xa0 : 0xa1, to, src);
         xWrite32(src.Displacement);
-    } else {
+    } else
+#endif
+    {
         xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0x8a : 0x8b, to, src);
     }
 }
 
-void xImpl_Mov::operator()(const xIndirect64orLess &dest, int imm) const
+void xImpl_Mov::operator()(const xIndirect64orLess &dest, sptr imm) const
 {
-    xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest);
+    switch (dest.GetOperandSize()) {
+        case 1:
+            pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!");
+            break;
+        case 2:
+            pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!");
+            break;
+        case 4:
+            pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!");
+            break;
+        case 8:
+            pxAssertMsg(imm == (s32)imm, "Immediate won't fit in immediate slot, go through a register!");
+            break;
+        default:
+            pxAssertMsg(0, "Bad indirect size!");
+    }
+    xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest, dest.GetImmSize());
     dest.xWriteImm(imm);
 }
 
 // preserve_flags  - set to true to disable optimizations which could alter the state of
 //   the flags (namely replacing mov reg,0 with xor).
-void xImpl_Mov::operator()(const xRegisterInt &to, int imm, bool preserve_flags) const
+void xImpl_Mov::operator()(const xRegisterInt &to, sptr imm, bool preserve_flags) const
 {
-    if (!preserve_flags && (imm == 0))
-        _g1_EmitOp(G1Type_XOR, to, to);
-    else {
+    switch (to.GetOperandSize()) {
+        case 1:
+            pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!");
+            break;
+        case 2:
+            pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!");
+            break;
+        case 4:
+            pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!");
+            break;
+        case 8:
+            pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit in immediate slot, use mov64 or lea!");
+            break;
+        default:
+            pxAssertMsg(0, "Bad indirect size!");
+    }
+    const xRegisterInt& to_ = to.GetNonWide();
+    if (!preserve_flags && (imm == 0)) {
+        _g1_EmitOp(G1Type_XOR, to_, to_);
+    } else if (imm == (u32)imm || !to.IsWide()) {
         // Note: MOV does not have (reg16/32,imm8) forms.
-        u8 opcode = (to.Is8BitOp() ? 0xb0 : 0xb8) | to.Id;
-        xOpAccWrite(to.GetPrefix16(), opcode, 0, to);
+        u8 opcode = (to_.Is8BitOp() ? 0xb0 : 0xb8) | to_.Id;
+        xOpAccWrite(to_.GetPrefix16(), opcode, 0, to_);
+        to_.xWriteImm(imm);
+    } else {
+        xOpWrite(to.GetPrefix16(), 0xc7, 0, to);
         to.xWriteImm(imm);
     }
 }
 
 const xImpl_Mov xMOV;
 
+#ifdef __M_X86_64
+void xImpl_MovImm64::operator()(const xRegister64& to, s64 imm, bool preserve_flags) const
+{
+    if (imm == (u32)imm || imm == (s32)imm) {
+        xMOV(to, imm, preserve_flags);
+    } else {
+        u8 opcode = 0xb8 | to.Id;
+        xOpAccWrite(to.GetPrefix16(), opcode, 0, to);
+        xWrite64(imm);
+    }
+}
+
+const xImpl_MovImm64 xMOV64;
+#endif
+
 // --------------------------------------------------------------------------------------
 //  CMOVcc
 // --------------------------------------------------------------------------------------
diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp
index 2942ab335b..8b6c3e5158 100644
--- a/common/src/x86emitter/x86emitter.cpp
+++ b/common/src/x86emitter/x86emitter.cpp
@@ -97,6 +97,7 @@ __fi void xWrite64(u64 val)
 // objects be initialized even though they have no actual variable members).
 
 const xAddressIndexer<xIndirectVoid> ptr = {};
+const xAddressIndexer<xIndirectNative> ptrNative = {};
 const xAddressIndexer<xIndirect128> ptr128 = {};
 const xAddressIndexer<xIndirect64> ptr64 = {};
 const xAddressIndexer<xIndirect32> ptr32 = {};
@@ -135,6 +136,16 @@ const xAddressReg
     esp(4), ebp(5),
     esi(6), edi(7);
 
+const xRegister32
+    eaxd(0), ebxd(3),
+    ecxd(1), edxd(2),
+    espd(4), ebpd(5),
+    esid(6), edid(7),
+    r8d(8), r9d(9),
+    r10d(10), r11d(11),
+    r12d(12), r13d(13),
+    r14d(14), r15d(15);
+
 const xRegister16
     ax(0), bx(3),
     cx(1), dx(2),
@@ -147,6 +158,41 @@ const xRegister8
     ah(4), ch(5),
     dh(6), bh(7);
 
+#if defined(_WIN32) || !defined(__M_X86_64)
+const xAddressReg
+    arg1reg = rcx,
+    arg2reg = rdx,
+#ifdef __M_X86_64
+    arg3reg = r8,
+    arg4reg = r9,
+#else
+    arg3reg = xRegisterEmpty(),
+    arg4reg = xRegisterEmpty(),
+#endif
+    calleeSavedReg1 = rdi,
+    calleeSavedReg2 = rsi;
+
+const xRegister32
+    arg1regd = ecxd,
+    arg2regd = edxd,
+    calleeSavedReg1d = edid,
+    calleeSavedReg2d = esid;
+#else
+const xAddressReg
+    arg1reg = rdi,
+    arg2reg = rsi,
+    arg3reg = rdx,
+    arg4reg = rcx,
+    calleeSavedReg1 = r12,
+    calleeSavedReg2 = r13;
+
+const xRegister32
+    arg1regd = edid,
+    arg2regd = esid,
+    calleeSavedReg1d = r12d,
+    calleeSavedReg2d = r13d;
+#endif
+
 // clang-format on
 
 const xRegisterCL cl;
@@ -250,16 +296,22 @@ static __fi void SibSB(u32 ss, u32 index, u32 base)
     xWrite8((ss << 6) | (index << 3) | base);
 }
 
-void EmitSibMagic(uint regfield, const void *address)
+void EmitSibMagic(uint regfield, const void *address, int extraRIPOffset)
 {
-    ModRM(0, regfield, ModRm_UseDisp32);
-
-    // SIB encoding only supports 32bit offsets, even on x86_64
-    // We must make sure that the displacement is within the 32bit range
-    // Else we will fail out in a spectacular fashion
     sptr displacement = (sptr)address;
-#ifdef __M_X86_64
-    pxAssertDev(displacement >= -0x80000000LL && displacement < 0x80000000LL, "SIB target is too far away, needs an indirect register");
+#ifndef __M_X86_64
+    ModRM(0, regfield, ModRm_UseDisp32);
+#else
+    sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
+    // Can we use a rip-relative address?  (Prefer this over eiz because it's a byte shorter)
+    if (ripRelative == (s32)ripRelative) {
+        ModRM(0, regfield, ModRm_UseDisp32);
+        displacement = ripRelative;
+    } else {
+        pxAssertDev(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
+        ModRM(0, regfield, ModRm_UseSib);
+        SibSB(0, Sib_EIZ, Sib_UseDisp32);
+    }
 #endif
 
     xWrite<s32>((s32)displacement);
@@ -293,7 +345,7 @@ static __fi bool NeedsSibMagic(const xIndirectVoid &info)
 // regfield - register field to be written to the ModRm.  This is either a register specifier
 //   or an opcode extension.  In either case, the instruction determines the value for us.
 //
-void EmitSibMagic(uint regfield, const xIndirectVoid &info)
+void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
 {
     // 3 bits also on x86_64 (so max is 8)
     // We might need to mask it on x86_64
@@ -302,6 +354,8 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
                                                        ((info.IsByteSizeDisp()) ? 1 : 2);
 
     pxAssert(!info.Base.IsEmpty() || !info.Index.IsEmpty() || displacement_size == 2);
+    // Displacement is only 64 bits for rip-relative addressing
+    pxAssert(info.Displacement == (s32)info.Displacement || (info.Base.IsEmpty() && info.Index.IsEmpty()));
 
     if (!NeedsSibMagic(info)) {
         // Use ModRm-only encoding, with the rm field holding an index/base register, if
@@ -310,13 +364,13 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
         // encoded *with* a displacement of 0, if it would otherwise not have one).
 
         if (info.Index.IsEmpty()) {
-            EmitSibMagic(regfield, (void *)info.Displacement);
+            EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset);
             return;
         } else {
             if (info.Index == ebp && displacement_size == 0)
                 displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
 
-            ModRM(displacement_size, regfield, info.Index.Id);
+            ModRM(displacement_size, regfield, info.Index.Id & 7);
         }
     } else {
         // In order to encode "just" index*scale (and no base), we have to encode
@@ -327,7 +381,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
 
         if (info.Base.IsEmpty()) {
             ModRM(0, regfield, ModRm_UseSib);
-            SibSB(info.Scale, info.Index.Id, ModRm_UseDisp32);
+            SibSB(info.Scale, info.Index.Id, Sib_UseDisp32);
             xWrite<s32>(info.Displacement);
             return;
         } else {
@@ -335,7 +389,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
                 displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
 
             ModRM(displacement_size, regfield, ModRm_UseSib);
-            SibSB(info.Scale, info.Index.Id, info.Base.Id);
+            SibSB(info.Scale, info.Index.Id & 7, info.Base.Id & 7);
         }
     }
 
@@ -349,24 +403,24 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info)
 
 // Writes a ModRM byte for "Direct" register access forms, which is used for all
 // instructions taking a form of [reg,reg].
-void EmitSibMagic(uint reg1, const xRegisterBase &reg2)
+void EmitSibMagic(uint reg1, const xRegisterBase &reg2, int)
 {
-    xWrite8((Mod_Direct << 6) | (reg1 << 3) | reg2.Id);
+    xWrite8((Mod_Direct << 6) | (reg1 << 3) | (reg2.Id & 7));
 }
 
-void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2)
+void EmitSibMagic(const xRegisterBase &reg1, const xRegisterBase &reg2, int)
 {
-    xWrite8((Mod_Direct << 6) | (reg1.Id << 3) | reg2.Id);
+    xWrite8((Mod_Direct << 6) | ((reg1.Id & 7) << 3) | (reg2.Id & 7));
 }
 
-void EmitSibMagic(const xRegisterBase &reg1, const void *src)
+void EmitSibMagic(const xRegisterBase &reg1, const void *src, int extraRIPOffset)
 {
-    EmitSibMagic(reg1.Id, src);
+    EmitSibMagic(reg1.Id & 7, src, extraRIPOffset);
 }
 
-void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib)
+void EmitSibMagic(const xRegisterBase &reg1, const xIndirectVoid &sib, int extraRIPOffset)
 {
-    EmitSibMagic(reg1.Id, sib);
+    EmitSibMagic(reg1.Id & 7, sib, extraRIPOffset);
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////
@@ -391,10 +445,14 @@ void EmitRex(uint regfield, const void *address)
 
 void EmitRex(uint regfield, const xIndirectVoid &info)
 {
-    bool w = info.Base.IsWide();
+    bool w = info.IsWide();
     bool r = false;
-    bool x = false;
-    bool b = info.IsExtended();
+	bool x = info.Index.IsExtended();
+	bool b = info.Base.IsExtended();
+	if (!NeedsSibMagic(info)) {
+		b = x;
+		x = false;
+	}
     EmitRex(w, r, x, b);
 }
 
@@ -432,6 +490,33 @@ void EmitRex(const xRegisterBase &reg1, const xIndirectVoid &sib)
     bool r = reg1.IsExtended();
     bool x = sib.Index.IsExtended();
     bool b = sib.Base.IsExtended();
+    if (!NeedsSibMagic(sib)) {
+        b = x;
+        x = false;
+    }
+    EmitRex(w, r, x, b);
+}
+
+// For use by instructions that are implicitly wide
+void EmitRexImplicitlyWide(const xRegisterBase &reg)
+{
+    bool w = false;
+    bool r = false;
+    bool x = false;
+    bool b = reg.IsExtended();
+    EmitRex(w, r, x, b);
+}
+
+void EmitRexImplicitlyWide(const xIndirectVoid &sib)
+{
+    bool w = false;
+    bool r = false;
+    bool x = sib.Index.IsExtended();
+    bool b = sib.Base.IsExtended();
+    if (!NeedsSibMagic(sib)) {
+        b = x;
+        x = false;
+    }
     EmitRex(w, r, x, b);
 }
 
@@ -459,7 +544,7 @@ __emitinline u8 *xGetPtr()
 __emitinline void xAlignPtr(uint bytes)
 {
     // forward align
-    x86Ptr = (u8 *)(((uptr)x86Ptr + bytes - 1) & ~(bytes - 1));
+    x86Ptr = (u8 *)(((uptr)x86Ptr + bytes - 1) & ~(uptr)(bytes - 1));
 }
 
 // Performs best-case alignment for the target CPU, for use prior to starting a new
@@ -506,7 +591,7 @@ xAddressVoid xAddressReg::operator+(const xAddressReg &right) const
     return xAddressVoid(*this, right);
 }
 
-xAddressVoid xAddressReg::operator+(s32 right) const
+xAddressVoid xAddressReg::operator+(sptr right) const
 {
     pxAssertMsg(Id != -1, "Uninitialized x86 register.");
     return xAddressVoid(*this, right);
@@ -518,7 +603,7 @@ xAddressVoid xAddressReg::operator+(const void *right) const
     return xAddressVoid(*this, (sptr)right);
 }
 
-xAddressVoid xAddressReg::operator-(s32 right) const
+xAddressVoid xAddressReg::operator-(sptr right) const
 {
     pxAssertMsg(Id != -1, "Uninitialized x86 register.");
     return xAddressVoid(*this, -right);
@@ -547,7 +632,7 @@ xAddressVoid xAddressReg::operator<<(u32 shift) const
 //  xAddressVoid  (method implementations)
 // --------------------------------------------------------------------------------------
 
-xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor, s32 displacement)
+xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, int factor, sptr displacement)
 {
     Base = base;
     Index = index;
@@ -558,7 +643,7 @@ xAddressVoid::xAddressVoid(const xAddressReg &base, const xAddressReg &index, in
     pxAssertMsg(index.Id != xRegId_Invalid, "Uninitialized x86 register.");
 }
 
-xAddressVoid::xAddressVoid(const xAddressReg &index, s32 displacement)
+xAddressVoid::xAddressVoid(const xAddressReg &index, sptr displacement)
 {
     Base = xEmptyReg;
     Index = index;
@@ -568,7 +653,7 @@ xAddressVoid::xAddressVoid(const xAddressReg &index, s32 displacement)
     pxAssertMsg(index.Id != xRegId_Invalid, "Uninitialized x86 register.");
 }
 
-xAddressVoid::xAddressVoid(s32 displacement)
+xAddressVoid::xAddressVoid(sptr displacement)
 {
     Base = xEmptyReg;
     Index = xEmptyReg;
@@ -581,12 +666,7 @@ xAddressVoid::xAddressVoid(const void *displacement)
     Base = xEmptyReg;
     Index = xEmptyReg;
     Factor = 0;
-#ifdef __M_X86_64
-    pxAssert(0);
-//Displacement = (s32)displacement;
-#else
-    Displacement = (s32)displacement;
-#endif
+    Displacement = (sptr)displacement;
 }
 
 xAddressVoid &xAddressVoid::Add(const xAddressReg &src)
@@ -643,7 +723,7 @@ xIndirectVoid::xIndirectVoid(const xAddressVoid &src)
     Reduce();
 }
 
-xIndirectVoid::xIndirectVoid(s32 disp)
+xIndirectVoid::xIndirectVoid(sptr disp)
 {
     Base = xEmptyReg;
     Index = xEmptyReg;
@@ -653,7 +733,7 @@ xIndirectVoid::xIndirectVoid(s32 disp)
     // no reduction necessary :D
 }
 
-xIndirectVoid::xIndirectVoid(xAddressReg base, xAddressReg index, int scale, s32 displacement)
+xIndirectVoid::xIndirectVoid(xAddressReg base, xAddressReg index, int scale, sptr displacement)
 {
     Base = base;
     Index = index;
@@ -754,7 +834,7 @@ uint xIndirectVoid::GetOperandSize() const
     return 0;
 }
 
-xIndirectVoid &xIndirectVoid::Add(s32 imm)
+xIndirectVoid &xIndirectVoid::Add(sptr imm)
 {
     Displacement += imm;
     return *this;
@@ -775,7 +855,11 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
 
     // See EmitSibMagic for commenting on SIB encoding.
 
-    if (!NeedsSibMagic(src)) {
+    // We should allow native-sized addressing regs (e.g. lea eax, [rax])
+    const xRegisterInt& sizeMatchedIndex = to.IsWide() ? src.Index : src.Index.GetNonWide();
+    const xRegisterInt& sizeMatchedBase = to.IsWide() ? src.Base : src.Base.GetNonWide();
+
+    if (!NeedsSibMagic(src) && src.Displacement == (s32)src.Displacement) {
         // LEA Land: means we have either 1-register encoding or just an offset.
         // offset is encodable as an immediate MOV, and a register is encodable
         // as a register MOV.
@@ -783,24 +867,17 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
         if (src.Index.IsEmpty()) {
             xMOV(to, src.Displacement);
             return;
-        } else if (displacement_size == 0) {
-            _xMovRtoR(to, src.Index);
+        }
+        else if (displacement_size == 0) {
+            _xMovRtoR(to, sizeMatchedIndex);
             return;
-        } else {
-            if (!preserve_flags) {
-                // encode as MOV and ADD combo.  Make sure to use the immediate on the
-                // ADD since it can encode as an 8-bit sign-extended value.
+        } else if (!preserve_flags) {
+            // encode as MOV and ADD combo.  Make sure to use the immediate on the
+            // ADD since it can encode as an 8-bit sign-extended value.
 
-                _xMovRtoR(to, src.Index);
-                xADD(to, src.Displacement);
-                return;
-            } else {
-                // note: no need to do ebp+0 check since we encode all 0 displacements as
-                // register assignments above (via MOV)
-
-                xWrite8(0x8d);
-                ModRM(displacement_size, to.Id, src.Index.Id);
-            }
+            _xMovRtoR(to, sizeMatchedIndex);
+            xADD(to, src.Displacement);
+            return;
         }
     } else {
         if (src.Base.IsEmpty()) {
@@ -816,49 +893,32 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
                 xSHL(to, src.Scale);
                 return;
             }
-            xWrite8(0x8d);
-            ModRM(0, to.Id, ModRm_UseSib);
-            SibSB(src.Scale, src.Index.Id, ModRm_UseDisp32);
-            xWrite32(src.Displacement);
-            return;
         } else {
             if (src.Scale == 0) {
                 if (!preserve_flags) {
                     if (src.Index == esp) {
                         // ESP is not encodable as an index (ix86 ignores it), thus:
-                        _xMovRtoR(to, src.Base); // will do the trick!
+                        _xMovRtoR(to, sizeMatchedBase); // will do the trick!
                         if (src.Displacement)
                             xADD(to, src.Displacement);
                         return;
                     } else if (src.Displacement == 0) {
-                        _xMovRtoR(to, src.Base);
-                        _g1_EmitOp(G1Type_ADD, to, src.Index);
+                        _xMovRtoR(to, sizeMatchedBase);
+                        _g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex);
                         return;
                     }
                 } else if ((src.Index == esp) && (src.Displacement == 0)) {
                     // special case handling of ESP as Index, which is replaceable with
                     // a single MOV even when preserve_flags is set! :D
 
-                    _xMovRtoR(to, src.Base);
+                    _xMovRtoR(to, sizeMatchedBase);
                     return;
                 }
             }
-
-            if (src.Base == ebp && displacement_size == 0)
-                displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
-
-            xWrite8(0x8d);
-            ModRM(displacement_size, to.Id, ModRm_UseSib);
-            SibSB(src.Scale, src.Index.Id, src.Base.Id);
         }
     }
 
-    if (displacement_size != 0) {
-        if (displacement_size == 1)
-            xWrite<s8>(src.Displacement);
-        else
-            xWrite<s32>(src.Displacement);
-    }
+    xOpWrite(0, 0x8d, to, src);
 }
 
 __emitinline void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags)
@@ -888,7 +948,7 @@ void xImpl_Test::operator()(const xRegisterInt &to, const xRegisterInt &from) co
 
 void xImpl_Test::operator()(const xIndirect64orLess &dest, int imm) const
 {
-    xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xf6 : 0xf7, 0, dest);
+    xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xf6 : 0xf7, 0, dest, dest.GetImmSize());
     dest.xWriteImm(imm);
 }
 
@@ -918,12 +978,12 @@ void xImpl_IncDec::operator()(const xRegisterInt &to) const
         u8 regfield = isDec ? 1 : 0;
         xOpWrite(to.GetPrefix16(), 0xfe, regfield, to);
     } else {
-#ifdef __M_X86_64
-        pxAssertMsg(0, "Single Byte INC/DEC aren't valid in 64 bits."
-                       "You need to use the ModR/M form (FF/0 FF/1 opcodes)");
-#endif
+        #ifdef __M_X86_64
+        xOpWrite(to.GetPrefix16(), 0xff, isDec ? 1 : 0, to);
+        #else
         to.prefix16();
         xWrite8((isDec ? 0x48 : 0x40) | to.Id);
+        #endif
     }
 }
 
@@ -977,24 +1037,37 @@ const xImpl_DwordShift xSHRD = {0xac};
 
 __emitinline void xPOP(const xIndirectVoid &from)
 {
+    EmitRexImplicitlyWide(from);
     xWrite8(0x8f);
     EmitSibMagic(0, from);
 }
 
 __emitinline void xPUSH(const xIndirectVoid &from)
 {
+    EmitRexImplicitlyWide(from);
     xWrite8(0xff);
     EmitSibMagic(6, from);
 }
 
-__fi void xPOP(xRegister32or64 from) { xWrite8(0x58 | from->Id); }
+__fi void xPOP(xRegister32or64 from) {
+    EmitRexImplicitlyWide(from);
+    xWrite8(0x58 | (from->Id & 7));
+}
 
 __fi void xPUSH(u32 imm)
 {
-    xWrite8(0x68);
-    xWrite32(imm);
+    if (is_s8(imm)) {
+        xWrite8(0x6a);
+        xWrite8(imm);
+    } else {
+        xWrite8(0x68);
+        xWrite32(imm);
+    }
+}
+__fi void xPUSH(xRegister32or64 from) {
+    EmitRexImplicitlyWide(from);
+    xWrite8(0x50 | (from->Id & 7));
 }
-__fi void xPUSH(xRegister32or64 from) { xWrite8(0x50 | from->Id); }
 
 // pushes the EFLAGS register onto the stack
 __fi void xPUSHFD() { xWrite8(0x9C); }
@@ -1053,17 +1126,18 @@ __emitinline void xRestoreReg(const xRegisterSSE &dest)
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // Helper object to handle ABI frame
-#ifdef __GNUC__
-
 #ifdef __M_X86_64
-// GCC ensures/requires stack to be 16 bytes aligned (but when?)
+
+// All x86-64 calling conventions ensure/require stack to be 16 bytes aligned
+// I couldn't find documentation on when, but compilers would indicate it's before the call: https://gcc.godbolt.org/z/KzTfsz
 #define ALIGN_STACK(v) xADD(rsp, v)
-#else
+
+#elif defined(__GNUC__)
+
 // GCC ensures/requires stack to be 16 bytes aligned before the call
 // Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes.
 // EBP will take 4 bytes if m_base_frame is enabled
 #define ALIGN_STACK(v) xADD(esp, v)
-#endif
 
 #else
 
@@ -1077,41 +1151,35 @@ xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, in
     m_save_base_pointer = save_base_pointer;
     m_offset = offset;
 
-#ifdef __M_X86_64
-
-    m_offset += 8; // Call stores the return address (4 bytes)
+    m_offset += sizeof(void*); // Call stores the return address (4 bytes)
 
     // Note rbp can surely be optimized in 64 bits
     if (m_base_frame) {
         xPUSH(rbp);
         xMOV(rbp, rsp);
-        m_offset += 8;
+        m_offset += sizeof(void*);
     } else if (m_save_base_pointer) {
         xPUSH(rbp);
-        m_offset += 8;
+        m_offset += sizeof(void*);
     }
 
+#ifdef __M_X86_64
+
     xPUSH(rbx);
     xPUSH(r12);
     xPUSH(r13);
     xPUSH(r14);
     xPUSH(r15);
     m_offset += 40;
+#ifdef _WIN32
+    xPUSH(rdi);
+    xPUSH(rsi);
+    xSUB(rsp, 32); // Windows calling convention specifies additional space for the callee to spill registers
+    m_offset += 48;
+#endif
 
 #else
 
-    m_offset += 4; // Call stores the return address (4 bytes)
-
-    // Create a new frame
-    if (m_base_frame) {
-        xPUSH(ebp);
-        xMOV(ebp, esp);
-        m_offset += 4;
-    } else if (m_save_base_pointer) {
-        xPUSH(ebp);
-        m_offset += 4;
-    }
-
     // Save the register context
     xPUSH(edi);
     xPUSH(esi);
@@ -1130,19 +1198,17 @@ xScopedStackFrame::~xScopedStackFrame()
 #ifdef __M_X86_64
 
     // Restore the register context
+#ifdef _WIN32
+    xADD(rsp, 32);
+    xPOP(rsi);
+    xPOP(rdi);
+#endif
     xPOP(r15);
     xPOP(r14);
     xPOP(r13);
     xPOP(r12);
     xPOP(rbx);
 
-    // Destroy the frame
-    if (m_base_frame) {
-        xLEAVE();
-    } else if (m_save_base_pointer) {
-        xPOP(rbp);
-    }
-
 #else
 
     // Restore the register context
@@ -1150,14 +1216,14 @@ xScopedStackFrame::~xScopedStackFrame()
     xPOP(esi);
     xPOP(edi);
 
+#endif
+
     // Destroy the frame
     if (m_base_frame) {
         xLEAVE();
     } else if (m_save_base_pointer) {
-        xPOP(ebp);
+        xPOP(rbp);
     }
-
-#endif
 }
 
 } // End namespace x86Emitter
diff --git a/tests/ctest/CMakeLists.txt b/tests/ctest/CMakeLists.txt
new file mode 100644
index 0000000000..6d0e19b1ca
--- /dev/null
+++ b/tests/ctest/CMakeLists.txt
@@ -0,0 +1,12 @@
+enable_testing()
+add_custom_target(unittests)
+add_custom_command(TARGET unittests POST_BUILD COMMAND ${CMAKE_CTEST_COMMAND})
+
+macro(add_pcsx2_test target)
+    add_executable(${target} EXCLUDE_FROM_ALL ${ARGN})
+    target_link_libraries(${target} PRIVATE x86emitter gtest_main Utilities)
+    add_dependencies(unittests ${target})
+    add_test(NAME ${target} COMMAND ${target})
+endmacro()
+
+add_subdirectory(x86emitter)
diff --git a/tests/ctest/x86emitter/CMakeLists.txt b/tests/ctest/x86emitter/CMakeLists.txt
new file mode 100644
index 0000000000..b3fa050bb6
--- /dev/null
+++ b/tests/ctest/x86emitter/CMakeLists.txt
@@ -0,0 +1 @@
+add_pcsx2_test(x86emitter_test codegen_tests.cpp codegen_tests_main.cpp codegen_tests.h)
diff --git a/tests/ctest/x86emitter/codegen_tests.cpp b/tests/ctest/x86emitter/codegen_tests.cpp
new file mode 100644
index 0000000000..607ed40496
--- /dev/null
+++ b/tests/ctest/x86emitter/codegen_tests.cpp
@@ -0,0 +1,48 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2020 PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <gtest/gtest.h>
+#include <x86emitter.h>
+
+using namespace x86Emitter;
+
+thread_local const char *currentTest;
+
+void pxOnAssert(const DiagnosticOrigin &origin, const wxString &msg) {
+	FAIL() << "Assertion failed: " << msg
+		<< "\n  at " << origin.srcfile << ":" << origin.line << ""
+		<< "\n  when trying to assemble " << currentTest;
+}
+
+void runCodegenTest(void (*exec)(void *base), const char* description, const char* expected) {
+	u8 code[4096];
+	memset(code, 0xcc, sizeof(code));
+	char str[4096] = {0};
+
+	if (!expected) return;
+	currentTest = description;
+	xSetPtr(code);
+	exec(code);
+	char *strPtr = str;
+	for (u8* ptr = code; ptr < xGetPtr(); ptr++) {
+		sprintf(strPtr, "%02x ", *ptr);
+		strPtr += 3;
+	}
+	if (strPtr != str) {
+		// Remove final space
+		*--strPtr = '\0';
+	}
+	EXPECT_STRCASEEQ(expected, str) << "Unexpected codegen from " << description;
+}
diff --git a/tests/ctest/x86emitter/codegen_tests.h b/tests/ctest/x86emitter/codegen_tests.h
new file mode 100644
index 0000000000..e785666562
--- /dev/null
+++ b/tests/ctest/x86emitter/codegen_tests.h
@@ -0,0 +1,29 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2020 PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+void runCodegenTest(void (*exec)(void *base), const char* description, const char* expected);
+
+// Use null to skip, empty string to expect no output
+#ifdef __M_X86_64
+# define CODEGEN_TEST(command, expected32, expected64) runCodegenTest([](void *base){ command; }, #command, expected64)
+# define CODEGEN_TEST_64(command, expected) CODEGEN_TEST(command, nullptr, expected)
+# define CODEGEN_TEST_32(command, expected)
+#else
+# define CODEGEN_TEST(command, expected32, expected64) runCodegenTest([](void *base){ command; }, #command, expected32)
+# define CODEGEN_TEST_64(command, expected)
+# define CODEGEN_TEST_32(command, expected) CODEGEN_TEST(command, expected, nullptr)
+#endif
+
+#define CODEGEN_TEST_BOTH(command, expected) CODEGEN_TEST(command, expected, expected)
diff --git a/tests/ctest/x86emitter/codegen_tests_main.cpp b/tests/ctest/x86emitter/codegen_tests_main.cpp
new file mode 100644
index 0000000000..8ca63be930
--- /dev/null
+++ b/tests/ctest/x86emitter/codegen_tests_main.cpp
@@ -0,0 +1,161 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2020 PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "codegen_tests.h"
+#include <gtest/gtest.h>
+#include <x86emitter.h>
+#include <cstdio>
+
+using namespace x86Emitter;
+
+TEST(CodegenTests, MOVTest)
+{
+	CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0");
+	CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8");
+	CODEGEN_TEST_BOTH(xMOV(eaxd, ecxd), "89 c8");
+	CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0");
+	CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0");
+	CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0");
+	CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8");
+	CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01");
+	CODEGEN_TEST_BOTH(xMOV(eaxd, ptrNative[rcx]), "8b 01");
+	CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08");
+	CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecxd), "89 08");
+	CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00");
+	CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00");
+	CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01");
+	CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08");
+	CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03");
+	CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03");
+	CODEGEN_TEST_BOTH(xMOV(eaxd, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
+	CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecxd), "89 4c 98 03");
+	CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03");
+	CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03");
+	CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00");
+	CODEGEN_TEST_BOTH(xMOV(ptr32[rax], 0), "c7 00 00 00 00 00");
+	CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], -1), "c7 44 98 03 ff ff ff ff");
+	CODEGEN_TEST_64(xMOV(rax, 0xffffffff), "b8 ff ff ff ff");
+	CODEGEN_TEST_64(xMOV(r8, -1), "49 c7 c0 ff ff ff ff");
+	CODEGEN_TEST_64(xMOV64(rax, 0x1234567890), "48 b8 90 78 56 34 12 00 00 00");
+	CODEGEN_TEST_64(xMOV64(r8, 0x1234567890), "49 b8 90 78 56 34 12 00 00 00");
+	CODEGEN_TEST_64(xMOV(ptr32[base], 0x12), "c7 05 f6 ff ff ff 12 00 00 00");
+}
+
+TEST(CodegenTests, LEATest)
+{
+	CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx
+	CODEGEN_TEST_BOTH(xLEA(eaxd, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
+	CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8
+	CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9
+	CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03");
+	CODEGEN_TEST_BOTH(xLEA(eaxd, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
+	CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03");
+	CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff");
+	CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234
+}
+
+TEST(CodegenTests, PUSHTest)
+{
+	CODEGEN_TEST_BOTH(xPUSH(rax), "50");
+	CODEGEN_TEST_64(xPUSH(r8), "41 50");
+	CODEGEN_TEST_BOTH(xPUSH(0x1234), "68 34 12 00 00");
+	CODEGEN_TEST_BOTH(xPUSH(0x12), "6a 12");
+	CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax]), "ff 30");
+	CODEGEN_TEST_64(xPUSH(ptrNative[r8]), "41 ff 30");
+	CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax*2+3+rbx]), "ff 74 43 03");
+	CODEGEN_TEST_64(xPUSH(ptrNative[rax*2+3+r8]), "41 ff 74 40 03");
+	CODEGEN_TEST_64(xPUSH(ptrNative[r9*4+3+r8]), "43 ff 74 88 03");
+	CODEGEN_TEST_64(xPUSH(ptrNative[r8*4+3+rax]), "42 ff 74 80 03");
+	CODEGEN_TEST_BOTH(xPUSH(ptrNative[rax*8+0x1234+rbx]), "ff b4 c3 34 12 00 00");
+	CODEGEN_TEST_64(xPUSH(ptrNative[base]), "ff 35 fa ff ff ff");
+	CODEGEN_TEST(xPUSH(ptrNative[(void*)0x1234]), "ff 35 34 12 00 00", "ff 34 25 34 12 00 00");
+}
+
+TEST(CodegenTests, POPTest)
+{
+	CODEGEN_TEST_BOTH(xPOP(rax), "58");
+	CODEGEN_TEST_64(xPOP(r8), "41 58");
+	CODEGEN_TEST_BOTH(xPOP(ptrNative[rax]), "8f 00");
+	CODEGEN_TEST_64(xPOP(ptrNative[r8]), "41 8f 00");
+	CODEGEN_TEST_BOTH(xPOP(ptrNative[rax*2+3+rbx]), "8f 44 43 03");
+	CODEGEN_TEST_64(xPOP(ptrNative[rax*2+3+r8]), "41 8f 44 40 03");
+	CODEGEN_TEST_64(xPOP(ptrNative[r9*4+3+r8]), "43 8f 44 88 03");
+	CODEGEN_TEST_64(xPOP(ptrNative[r8*4+3+rax]), "42 8f 44 80 03");
+	CODEGEN_TEST_BOTH(xPOP(ptrNative[rax*8+0x1234+rbx]), "8f 84 c3 34 12 00 00");
+	CODEGEN_TEST_64(xPOP(ptrNative[base]), "8f 05 fa ff ff ff");
+	CODEGEN_TEST(xPOP(ptrNative[(void*)0x1234]), "8f 05 34 12 00 00", "8f 04 25 34 12 00 00");
+}
+
+TEST(CodegenTests, MathTest)
+{
+	CODEGEN_TEST(xINC(eaxd), "40", "ff c0");
+	CODEGEN_TEST(xDEC(rax), "48", "48 ff c8");
+	CODEGEN_TEST_64(xINC(r8), "49 ff c0");
+	CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8");
+	CODEGEN_TEST_64(xADD(r8, 0x12), "49 83 c0 12");
+	CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00");
+	CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0");
+	CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00");
+	CODEGEN_TEST_BOTH(xADD(eaxd, ebxd), "01 d8");
+	CODEGEN_TEST_BOTH(xADD(eaxd, 0x1234), "05 34 12 00 00");
+	CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03");
+	CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03");
+	CODEGEN_TEST_BOTH(xADD(eaxd, ptr32[rbx*4+3+rcx]), "03 44 99 03");
+	CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecxd), "01 4c 83 03");
+	CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12");
+	CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00");
+	CODEGEN_TEST_BOTH(xSUB(eaxd, ptr32[rcx*4+rax]), "2b 04 88");
+	CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff");
+	CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00");
+	CODEGEN_TEST_BOTH(xDIV(ecxd), "f7 f9");
+}
+
+TEST(CodegenTests, BitwiseTest)
+{
+	CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8");
+	CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8");
+	CODEGEN_TEST_BOTH(xSHR(ecxd, cl), "d3 e9");
+	CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8");
+	CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c");
+	CODEGEN_TEST_BOTH(xSAR(eaxd, 30), "c1 f8 1e");
+	CODEGEN_TEST_BOTH(xSHL(ebxd, 30), "c1 e3 1e");
+	CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04");
+	CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8");
+	CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02");
+	CODEGEN_TEST_BOTH(xOR(esid, ptr32[rax+rbx]), "0b 34 18");
+	CODEGEN_TEST_64(xNOT(r8), "49 f7 d0");
+	CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10");
+	CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13");
+}
+
+TEST(CodegenTests, JmpTest)
+{
+	CODEGEN_TEST_64(xJMP(r8), "41 ff e0");
+	CODEGEN_TEST_BOTH(xJMP(rdi), "ff e7");
+	CODEGEN_TEST_BOTH(xJMP(ptrNative[rax]), "ff 20");
+	CODEGEN_TEST_BOTH(xJA(base), "77 fe");
+	CODEGEN_TEST_BOTH(xJB((char*)base - 0xFFFF), "0f 82 fb ff fe ff");
+}
+
+TEST(CodegenTests, SSETest)
+{
+	CODEGEN_TEST_BOTH(xMOVAPS(xmm0, xmm1), "0f 28 c1");
+	CODEGEN_TEST_64(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
+	CODEGEN_TEST_64(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
+	CODEGEN_TEST_64(xMOVAPS(ptr128[rax+r9], xmm8), "46 0f 29 04 08");
+	CODEGEN_TEST_BOTH(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
+	CODEGEN_TEST_64(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
+	CODEGEN_TEST_64(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02");
+}