From b0d6c29073ca1996c3e130d5b3de922cb9ac671f Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:30:41 +0000
Subject: [PATCH 1/9] JitAsmCommon: Add missing sizes to constant arrays

This allows generic code to determine the size of these arrays.
---
 Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp | 6 +++---
 Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
index 6378fe2860..783e15aa50 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
@@ -7,7 +7,7 @@
 alignas(16) const u8 pbswapShuffle1x4[16] = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
 alignas(16) const u8 pbswapShuffle2x4[16] = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
 
-alignas(16) const float m_quantizeTableS[] = {
+alignas(16) const float m_quantizeTableS[128] = {
     (1ULL << 0),        (1ULL << 0),        (1ULL << 1),        (1ULL << 1),
     (1ULL << 2),        (1ULL << 2),        (1ULL << 3),        (1ULL << 3),
     (1ULL << 4),        (1ULL << 4),        (1ULL << 5),        (1ULL << 5),
@@ -42,7 +42,7 @@ alignas(16) const float m_quantizeTableS[] = {
     1.0 / (1ULL << 2),  1.0 / (1ULL << 2),  1.0 / (1ULL << 1),  1.0 / (1ULL << 1),
 };
 
-alignas(16) const float m_dequantizeTableS[] = {
+alignas(16) const float m_dequantizeTableS[128] = {
     1.0 / (1ULL << 0),  1.0 / (1ULL << 0),  1.0 / (1ULL << 1),  1.0 / (1ULL << 1),
     1.0 / (1ULL << 2),  1.0 / (1ULL << 2),  1.0 / (1ULL << 3),  1.0 / (1ULL << 3),
     1.0 / (1ULL << 4),  1.0 / (1ULL << 4),  1.0 / (1ULL << 5),  1.0 / (1ULL << 5),
@@ -77,4 +77,4 @@ alignas(16) const float m_dequantizeTableS[] = {
     (1ULL << 2),        (1ULL << 2),        (1ULL << 1),        (1ULL << 1),
 };
 
-alignas(16) const float m_one[] = {1.0f, 0.0f, 0.0f, 0.0f};
+alignas(16) const float m_one[4] = {1.0f, 0.0f, 0.0f, 0.0f};
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
index f8d997ca57..73c920cf18 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
@@ -8,9 +8,9 @@
 
 alignas(16) extern const u8 pbswapShuffle1x4[16];
 alignas(16) extern const u8 pbswapShuffle2x4[16];
-alignas(16) extern const float m_one[];
-alignas(16) extern const float m_quantizeTableS[];
-alignas(16) extern const float m_dequantizeTableS[];
+alignas(16) extern const float m_one[4];
+alignas(16) extern const float m_quantizeTableS[128];
+alignas(16) extern const float m_dequantizeTableS[128];
 
 class CommonAsmRoutinesBase
 {

From 9951961338349aba00983668419b72a688e1df5e Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:33:22 +0000
Subject: [PATCH 2/9] ConstantPool: Implement a constant pool

Constants are copied into this pool so that they live at a memory
location that is close to the code that references it. The pool allocates
memory from a provided X64CodeBlock to use.

The purpose of the pool is to overcome the 32-bit offset limitation that
RIP-relative addressing has.`
---
 Source/Core/Core/CMakeLists.txt               |  1 +
 Source/Core/Core/Core.vcxproj                 |  2 +
 Source/Core/Core/Core.vcxproj.filters         |  6 ++
 .../Core/PowerPC/Jit64Common/ConstantPool.cpp | 67 +++++++++++++++++++
 .../Core/PowerPC/Jit64Common/ConstantPool.h   | 52 ++++++++++++++
 5 files changed, 128 insertions(+)
 create mode 100644 Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp
 create mode 100644 Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h

diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt
index c9704450ab..13d55216cc 100644
--- a/Source/Core/Core/CMakeLists.txt
+++ b/Source/Core/Core/CMakeLists.txt
@@ -244,6 +244,7 @@ if(_M_X86)
     PowerPC/Jit64/JitRegCache.cpp
     PowerPC/Jit64/Jit_SystemRegisters.cpp
     PowerPC/Jit64Common/BlockCache.cpp
+    PowerPC/Jit64Common/ConstantPool.cpp
     PowerPC/Jit64Common/EmuCodeBlock.cpp
     PowerPC/Jit64Common/FarCodeCache.cpp
     PowerPC/Jit64Common/Jit64AsmCommon.cpp
diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj
index 86edd95d87..049aba4703 100644
--- a/Source/Core/Core/Core.vcxproj
+++ b/Source/Core/Core/Core.vcxproj
@@ -243,6 +243,7 @@
     <ClCompile Include="PowerPC\Interpreter\Interpreter_Paired.cpp" />
     <ClCompile Include="PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" />
     <ClCompile Include="PowerPC\Interpreter\Interpreter_Tables.cpp" />
+    <ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
     <ClCompile Include="PowerPC\JitILCommon\IR.cpp" />
     <ClCompile Include="PowerPC\JitILCommon\JitILBase_Branch.cpp" />
     <ClCompile Include="PowerPC\JitILCommon\JitILBase_FloatingPoint.cpp" />
@@ -484,6 +485,7 @@
     <ClInclude Include="PowerPC\CachedInterpreter\InterpreterBlockCache.h" />
     <ClInclude Include="PowerPC\Interpreter\Interpreter.h" />
     <ClInclude Include="PowerPC\Interpreter\Interpreter_FPUtils.h" />
+    <ClInclude Include="PowerPC\Jit64Common\ConstantPool.h" />
     <ClInclude Include="PowerPC\Jit64IL\JitIL.h" />
     <ClInclude Include="PowerPC\Jit64\FPURegCache.h" />
     <ClInclude Include="PowerPC\Jit64\GPRRegCache.h" />
diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters
index 18bce72497..e108623f61 100644
--- a/Source/Core/Core/Core.vcxproj.filters
+++ b/Source/Core/Core/Core.vcxproj.filters
@@ -867,6 +867,9 @@
     <ClCompile Include="IOS\USB\Bluetooth\WiimoteHIDAttr.cpp">
       <Filter>IOS\USB\Bluetooth</Filter>
     </ClCompile>
+    <ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp">
+      <Filter>PowerPC\Jit64Common</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="BootManager.h" />
@@ -1487,6 +1490,9 @@
     <ClInclude Include="IOS\MIOS.h">
       <Filter>IOS</Filter>
     </ClInclude>
+    <ClInclude Include="PowerPC\Jit64Common\ConstantPool.h">
+      <Filter>PowerPC\Jit64Common</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Text Include="CMakeLists.txt" />
diff --git a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp
new file mode 100644
index 0000000000..20c3f46802
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp
@@ -0,0 +1,67 @@
+// Copyright 2017 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <utility>
+
+#include "Common/Assert.h"
+#include "Common/x64Emitter.h"
+#include "Core/PowerPC/Jit64Common/ConstantPool.h"
+
+ConstantPool::ConstantPool(Gen::X64CodeBlock* parent) : m_parent(parent)
+{
+}
+
+ConstantPool::~ConstantPool() = default;
+
+void ConstantPool::AllocCodeSpace()
+{
+  _assert_(!m_current_ptr);
+  Init();
+}
+
+void ConstantPool::ClearCodeSpace()
+{
+  Init();
+}
+
+Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size,
+                                          size_t num_elements, size_t index)
+{
+  const size_t value_size = element_size * num_elements;
+  auto iter = m_const_info.find(value);
+
+  if (iter == m_const_info.end())
+  {
+    void* ptr = std::align(ALIGNMENT, value_size, m_current_ptr, m_remaining_size);
+    _assert_msg_(DYNA_REC, ptr, "Constant pool has run out of space.");
+
+    m_current_ptr = static_cast<u8*>(m_current_ptr) + value_size;
+    m_remaining_size -= value_size;
+
+    std::memcpy(ptr, value, value_size);
+    iter = m_const_info.emplace(std::make_pair(value, ConstantInfo{ptr, value_size})).first;
+  }
+
+  const ConstantInfo& info = iter->second;
+  _assert_msg_(DYNA_REC, info.m_size == value_size,
+               "Constant has incorrect size in constant pool.");
+  u8* location = static_cast<u8*>(info.m_location);
+  return Gen::M(location + element_size * index);
+}
+
+void ConstantPool::Init()
+{
+  // If execution happens to run to the start of the constant pool, halt.
+  m_parent->INT3();
+  m_parent->AlignCode16();
+
+  // Reserve a block of memory CONST_POOL_SIZE in size.
+  m_current_ptr = m_parent->GetWritableCodePtr();
+  m_parent->ReserveCodeSpace(CONST_POOL_SIZE);
+
+  m_remaining_size = CONST_POOL_SIZE;
+  m_const_info.clear();
+}
diff --git a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h
new file mode 100644
index 0000000000..eba5cbec0f
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h
@@ -0,0 +1,52 @@
+// Copyright 2017 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <map>
+
+namespace Gen
+{
+struct OpArg;
+class X64CodeBlock;
+}
+
+// Constants are copied into this pool so that they live at a memory location
+// that is close to the code that references it. This ensures that the 32-bit
+// limitation on RIP addressing is not an issue.
+class ConstantPool
+{
+public:
+  static constexpr size_t CONST_POOL_SIZE = 1024 * 32;
+  static constexpr size_t ALIGNMENT = 16;
+
+  explicit ConstantPool(Gen::X64CodeBlock* parent);
+  ~ConstantPool();
+
+  // ConstantPool reserves CONST_POOL_SIZE bytes from parent, and uses
+  // that space to store its constants.
+  void AllocCodeSpace();
+  void ClearCodeSpace();
+
+  // Copies the value into the pool if it doesn't exist. Returns a pointer
+  // to existing values if they were already copied. Pointer equality is
+  // used to determine if two constants are the same.
+  Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements,
+                              size_t index);
+
+private:
+  void Init();
+
+  struct ConstantInfo
+  {
+    void* m_location;
+    size_t m_size;
+  };
+
+  Gen::X64CodeBlock* m_parent;
+  void* m_current_ptr = nullptr;
+  size_t m_remaining_size = CONST_POOL_SIZE;
+  std::map<const void*, ConstantInfo> m_const_info;
+};

From ff441efc268bb0b042b822d74d73424dedd80625 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:37:25 +0000
Subject: [PATCH 3/9] EmuCodeBlock: Use ConstantPool

---
 Source/Core/Common/CodeBlock.h                  |  4 ++--
 .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp   | 12 ++++++++++++
 .../Core/PowerPC/Jit64Common/EmuCodeBlock.h     | 17 +++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Common/CodeBlock.h b/Source/Core/Common/CodeBlock.h
index 7bd554b9aa..28a4919e66 100644
--- a/Source/Core/Common/CodeBlock.h
+++ b/Source/Core/Common/CodeBlock.h
@@ -42,7 +42,7 @@ public:
   }
 
   // Call this before you generate any code.
-  void AllocCodeSpace(size_t size, bool need_low = true)
+  virtual void AllocCodeSpace(size_t size, bool need_low = true)
   {
     region_size = size;
     region = static_cast<u8*>(Common::AllocateExecutableMemory(region_size, need_low));
@@ -51,7 +51,7 @@ public:
 
   // Always clear code space with breakpoints, so that if someone accidentally executes
   // uninitialized, it just breaks into the debugger.
-  void ClearCodeSpace()
+  virtual void ClearCodeSpace()
   {
     PoisonMemory();
     ResetCodePtr();
diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
index 353aaed52d..14e06d7b6c 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
@@ -40,6 +40,18 @@ OpArg FixImmediate(int access_size, OpArg arg)
 }
 }  // Anonymous namespace
 
+void EmuCodeBlock::ClearCodeSpace()
+{
+  X64CodeBlock::ClearCodeSpace();
+  m_const_pool.ClearCodeSpace();
+}
+
+void EmuCodeBlock::AllocCodeSpace(size_t size, bool need_low)
+{
+  X64CodeBlock::AllocCodeSpace(size + ConstantPool::CONST_POOL_SIZE, need_low);
+  m_const_pool.AllocCodeSpace();
+}
+
 void EmuCodeBlock::MemoryExceptionCheck()
 {
   // TODO: We really should untangle the trampolines, exception handlers and
diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h
index 69e6359947..08bafb49d7 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h
@@ -10,6 +10,7 @@
 #include "Common/CommonTypes.h"
 #include "Common/x64Emitter.h"
 
+#include "Core/PowerPC/Jit64Common/ConstantPool.h"
 #include "Core/PowerPC/Jit64Common/FarCodeCache.h"
 #include "Core/PowerPC/Jit64Common/TrampolineInfo.h"
 
@@ -22,12 +23,27 @@ class Mapping;
 class EmuCodeBlock : public Gen::X64CodeBlock
 {
 public:
+  void ClearCodeSpace() override;
+  void AllocCodeSpace(size_t size, bool need_low = true) override;
+
   void MemoryExceptionCheck();
 
   // Simple functions to switch between near and far code emitting
   void SwitchToFarCode();
   void SwitchToNearCode();
 
+  template <typename T>
+  Gen::OpArg MConst(const T& value)
+  {
+    return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0);
+  }
+
+  template <typename T, size_t N>
+  Gen::OpArg MConst(const T (&value)[N], size_t index = 0)
+  {
+    return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index);
+  }
+
   Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
                                       BitSet32 registers_in_use);
   void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
@@ -105,6 +121,7 @@ public:
   void Clear();
 
 protected:
+  ConstantPool m_const_pool{this};
   FarCodeCache m_far_code;
   u8* m_near_code;  // Backed up when we switch to far code.
 

From 0fe234ec9baa66c7c3e992d3f5f1feb6307b0440 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:39:10 +0000
Subject: [PATCH 4/9] Jit_FloatingPoint: Use MConst for constants

---
 .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp  | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index 63e78136ba..a331569470 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -108,7 +108,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
       UCOMISD(xmm, R(xmm));
       fixups.push_back(J_CC(CC_P));
     }
-    MOVDDUP(xmm, M(psGeneratedQNaN));
+    MOVDDUP(xmm, MConst(psGeneratedQNaN));
     for (FixupBranch fixup : fixups)
       SetJumpTarget(fixup);
     FixupBranch done = J(true);
@@ -127,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
       SwitchToFarCode();
       SetJumpTarget(handle_nan);
       _assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
-      BLENDVPD(xmm, M(psGeneratedQNaN));
+      BLENDVPD(xmm, MConst(psGeneratedQNaN));
       for (u32 x : inputs)
       {
         avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
@@ -151,7 +151,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
       SetJumpTarget(handle_nan);
       MOVAPD(tmp, R(clobber));
       ANDNPD(clobber, R(xmm));
-      ANDPD(tmp, M(psGeneratedQNaN));
+      ANDPD(tmp, MConst(psGeneratedQNaN));
       ORPD(tmp, R(clobber));
       MOVAPD(xmm, R(tmp));
       for (u32 x : inputs)
@@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
         ADDSD(XMM1, fpr.R(b));
     }
     if (inst.SUBOP5 == 31)  // nmadd
-      XORPD(XMM1, M(packed ? psSignBits2 : psSignBits));
+      XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits));
   }
   fpr.BindToRegister(d, !single);
   if (single)
@@ -385,15 +385,15 @@ void Jit64::fsign(UGeckoInstruction inst)
   {
   case 40:  // neg
     avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
-           M(packed ? psSignBits2 : psSignBits), packed);
+           MConst(packed ? psSignBits2 : psSignBits), packed);
     break;
   case 136:  // nabs
-    avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits),
-           packed);
+    avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src,
+           MConst(packed ? psSignBits2 : psSignBits), packed);
     break;
   case 264:  // abs
-    avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask),
-           packed);
+    avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src,
+           MConst(packed ? psAbsMask2 : psAbsMask), packed);
     break;
   default:
     PanicAlert("fsign bleh");
@@ -608,7 +608,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
   // The upper 32 bits of the result are set to 0xfff80000,
   // except for -0.0 where they are set to 0xfff80001 (TODO).
 
-  MOVAPD(XMM0, M(half_qnan_and_s32_max));
+  MOVAPD(XMM0, MConst(half_qnan_and_s32_max));
   MINSD(XMM0, fpr.R(b));
   switch (inst.SUBOP10)
   {

From 8b93baefb7f111d9fc232282230bc7fca83b304c Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:39:47 +0000
Subject: [PATCH 5/9] Jit_SystemRegisters: Use MConst for constants

---
 Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
index 2127b1f62d..a9ae18806a 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
@@ -623,7 +623,7 @@ void Jit64::mcrfs(UGeckoInstruction inst)
   }
   AND(32, R(RSCRATCH), Imm32(mask));
   MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
-  LEA(64, RSCRATCH, M(m_crTable.data()));
+  LEA(64, RSCRATCH, MConst(m_crTable));
   MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0));
   MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
 }
@@ -664,7 +664,7 @@ static const u32 s_fpscr_to_mxcsr[] = {
 // Needs value of FPSCR in RSCRATCH.
 void Jit64::UpdateMXCSR()
 {
-  LEA(64, RSCRATCH2, M(&s_fpscr_to_mxcsr));
+  LEA(64, RSCRATCH2, MConst(s_fpscr_to_mxcsr));
   AND(32, R(RSCRATCH), Imm32(7));
   LDMXCSR(MComplex(RSCRATCH2, RSCRATCH, SCALE_4, 0));
 }
@@ -730,7 +730,7 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
 
   // Field 7 contains NI and RN.
   if (inst.CRFD == 7)
-    LDMXCSR(M(&s_fpscr_to_mxcsr[imm & 7]));
+    LDMXCSR(MConst(s_fpscr_to_mxcsr, imm & 7));
 }
 
 void Jit64::mtfsfx(UGeckoInstruction inst)

From 4814c4ac5a26115bb6aac7f131098ab76978888a Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:40:10 +0000
Subject: [PATCH 6/9] EmuCodeBlock: Use MConst for constants

---
 .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
index 14e06d7b6c..3a35c8dfa7 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
@@ -848,16 +848,16 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
     // mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1);
     if (input.IsSimpleReg() && cpu_info.bAVX)
     {
-      VPAND(tmp, input.GetSimpleReg(), M(psRoundBit));
-      VPAND(output, input.GetSimpleReg(), M(psMantissaTruncate));
+      VPAND(tmp, input.GetSimpleReg(), MConst(psRoundBit));
+      VPAND(output, input.GetSimpleReg(), MConst(psMantissaTruncate));
       PADDQ(output, R(tmp));
     }
     else
     {
       if (!input.IsSimpleReg(output))
         MOVAPD(output, input);
-      avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true);
-      PAND(output, M(psMantissaTruncate));
+      avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), MConst(psRoundBit), true, true);
+      PAND(output, MConst(psMantissaTruncate));
       PADDQ(output, R(tmp));
     }
   }
@@ -902,7 +902,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   MOVSD(XMM1, R(src));
 
   // Grab Exponent
-  PAND(XMM1, M(&double_exponent));
+  PAND(XMM1, MConst(double_exponent));
   PSRLQ(XMM1, 52);
   MOVD_xmm(R(RSCRATCH), XMM1);
 
@@ -921,15 +921,15 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
 
   // xmm1 = fraction | 0x0010000000000000
   MOVSD(XMM1, R(src));
-  PAND(XMM1, M(&double_fraction));
-  POR(XMM1, M(&double_explicit_top_bit));
+  PAND(XMM1, MConst(double_fraction));
+  POR(XMM1, MConst(double_explicit_top_bit));
 
   // fraction >> shift
   PSRLQ(XMM1, R(XMM0));
 
   // OR the sign bit in.
   MOVSD(XMM0, R(src));
-  PAND(XMM0, M(&double_sign_bit));
+  PAND(XMM0, MConst(double_sign_bit));
   PSRLQ(XMM0, 32);
   POR(XMM1, R(XMM0));
 
@@ -942,12 +942,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
 
   // We want bits 0, 1
   MOVSD(XMM1, R(src));
-  PAND(XMM1, M(&double_top_two_bits));
+  PAND(XMM1, MConst(double_top_two_bits));
   PSRLQ(XMM1, 32);
 
   // And 5 through to 34
   MOVSD(XMM0, R(src));
-  PAND(XMM0, M(&double_bottom_bits));
+  PAND(XMM0, MConst(double_bottom_bits));
   PSRLQ(XMM0, 29);
 
   // OR them togther
@@ -979,8 +979,8 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   // Here, check to see if the source is small enough that it will result in a denormal, and pass it
   // to the x87 unit
   // if it is.
-  avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), M(&double_sign_bit), true, true);
-  UCOMISD(XMM0, M(&min_norm_single));
+  avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), MConst(double_sign_bit), true, true);
+  UCOMISD(XMM0, MConst(min_norm_single));
   FixupBranch nanConversion = J_CC(CC_P, true);
   FixupBranch denormalConversion = J_CC(CC_B, true);
   CVTSD2SS(dst, R(src));
@@ -994,7 +994,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   FixupBranch continue1 = J_CC(CC_C, true);
   // Clear the quiet bit of the SNaN, which was 0 (signalling) but got set to 1 (quiet) by
   // conversion.
-  ANDPS(dst, M(&single_qnan_bit));
+  ANDPS(dst, MConst(single_qnan_bit));
   FixupBranch continue2 = J(true);
 
   SetJumpTarget(denormalConversion);
@@ -1037,7 +1037,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
   SetJumpTarget(nanConversion);
   TEST(32, R(gprsrc), Imm32(0x00400000));
   FixupBranch continue1 = J_CC(CC_NZ, true);
-  ANDPD(dst, M(&double_qnan_bit));
+  ANDPD(dst, MConst(double_qnan_bit));
   FixupBranch continue2 = J(true);
   SwitchToNearCode();
 
@@ -1069,7 +1069,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
   {
     MOVQ_xmm(R(RSCRATCH), xmm);
     SHR(64, R(RSCRATCH), Imm8(63));  // Get the sign bit; almost all the branches need it.
-    PTEST(xmm, M(psDoubleExp));
+    PTEST(xmm, MConst(psDoubleExp));
     FixupBranch maxExponent = J_CC(CC_C);
     FixupBranch zeroExponent = J_CC(CC_Z);
 
@@ -1079,7 +1079,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
     continue1 = J();
 
     SetJumpTarget(maxExponent);
-    PTEST(xmm, M(psDoubleFrac));
+    PTEST(xmm, MConst(psDoubleFrac));
     FixupBranch notNAN = J_CC(CC_Z);
 
     // Max exponent + mantissa: PPC_FPCLASS_QNAN
@@ -1109,10 +1109,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
   else
   {
     MOVQ_xmm(R(RSCRATCH), xmm);
-    TEST(64, R(RSCRATCH), M(psDoubleExp));
+    TEST(64, R(RSCRATCH), MConst(psDoubleExp));
     FixupBranch zeroExponent = J_CC(CC_Z);
-    AND(64, R(RSCRATCH), M(psDoubleNoSign));
-    CMP(64, R(RSCRATCH), M(psDoubleExp));
+    AND(64, R(RSCRATCH), MConst(psDoubleNoSign));
+    CMP(64, R(RSCRATCH), MConst(psDoubleExp));
     FixupBranch nan =
         J_CC(CC_G);  // This works because if the sign bit is set, RSCRATCH is negative
     FixupBranch infinity = J_CC(CC_E);

From 3dccc369d3f377f7a04ea57b8e5d1ca2ac6fc75d Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 12:40:46 +0000
Subject: [PATCH 7/9] Jit64AsmCommon: Use MConst for constants

---
 .../PowerPC/Jit64Common/Jit64AsmCommon.cpp    | 55 +++++++++++--------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index a98d69551d..876d24c73e 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -180,8 +180,6 @@ void CommonAsmRoutines::GenMfcr()
   X64Reg tmp = RSCRATCH2;
   X64Reg cr_val = RSCRATCH_EXTRA;
   XOR(32, R(dst), R(dst));
-  // we only need to zero the high bits of tmp once
-  XOR(32, R(tmp), R(tmp));
   for (int i = 0; i < 8; i++)
   {
     static const u32 m_flagTable[8] = {0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9};
@@ -190,9 +188,13 @@ void CommonAsmRoutines::GenMfcr()
 
     MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
 
+    // Upper bits of tmp need to be zeroed.
+    // Note: tmp is used later for address calculations and thus
+    //       can't be zero-ed once. This also prevents partial
+    //       register stalls due to SETcc.
+    XOR(32, R(tmp), R(tmp));
     // EQ: Bits 31-0 == 0; set flag bit 1
     TEST(32, R(cr_val), R(cr_val));
-    // FIXME: is there a better way to do this without the partial register merging?
     SETcc(CC_Z, R(tmp));
     LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
 
@@ -204,7 +206,8 @@ void CommonAsmRoutines::GenMfcr()
     // SO: Bit 61 set; set flag bit 0
     // LT: Bit 62 set; set flag bit 3
     SHR(64, R(cr_val), Imm8(61));
-    OR(32, R(dst), MScaled(cr_val, SCALE_4, PtrOffset(m_flagTable)));
+    LEA(64, tmp, MConst(m_flagTable));
+    OR(32, R(dst), MComplex(tmp, cr_val, SCALE_4, 0));
   }
   RET();
 
@@ -297,11 +300,12 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
     if (quantize == -1)
     {
       SHR(32, R(RSCRATCH2), Imm8(5));
-      MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS)));
+      LEA(64, RSCRATCH, MConst(m_quantizeTableS));
+      MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH));
     }
     else if (quantize > 0)
     {
-      MULSS(XMM0, M(&m_quantizeTableS[quantize * 2]));
+      MULSS(XMM0, MConst(m_quantizeTableS, quantize * 2));
     }
 
     switch (type)
@@ -309,20 +313,20 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
     case QUANTIZE_U8:
       XORPS(XMM1, R(XMM1));
       MAXSS(XMM0, R(XMM1));
-      MINSS(XMM0, M(&m_255));
+      MINSS(XMM0, MConst(m_255));
       break;
     case QUANTIZE_S8:
-      MAXSS(XMM0, M(&m_m128));
-      MINSS(XMM0, M(&m_127));
+      MAXSS(XMM0, MConst(m_m128));
+      MINSS(XMM0, MConst(m_127));
       break;
     case QUANTIZE_U16:
       XORPS(XMM1, R(XMM1));
       MAXSS(XMM0, R(XMM1));
-      MINSS(XMM0, M(m_65535));
+      MINSS(XMM0, MConst(m_65535));
       break;
     case QUANTIZE_S16:
-      MAXSS(XMM0, M(&m_m32768));
-      MINSS(XMM0, M(&m_32767));
+      MAXSS(XMM0, MConst(m_m32768));
+      MINSS(XMM0, MConst(m_32767));
       break;
     default:
       break;
@@ -335,12 +339,13 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
     if (quantize == -1)
     {
       SHR(32, R(RSCRATCH2), Imm8(5));
-      MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS)));
+      LEA(64, RSCRATCH, MConst(m_quantizeTableS));
+      MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH));
       MULPS(XMM0, R(XMM1));
     }
     else if (quantize > 0)
     {
-      MOVQ_xmm(XMM1, M(&m_quantizeTableS[quantize * 2]));
+      MOVQ_xmm(XMM1, MConst(m_quantizeTableS, quantize * 2));
       MULPS(XMM0, R(XMM1));
     }
 
@@ -358,7 +363,7 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
     // is out of int32 range while it's OK for large negatives, it isn't for positives
     // I don't know whether the overflow actually happens in any games but it potentially can
     // cause problems, so we need some clamping
-    MINPS(XMM0, M(m_65535));
+    MINPS(XMM0, MConst(m_65535));
     CVTTPS2DQ(XMM0, R(XMM0));
 
     switch (type)
@@ -419,7 +424,7 @@ void QuantizedMemoryRoutines::GenQuantizedStoreFloat(bool single, bool isInline)
   {
     if (cpu_info.bSSSE3)
     {
-      PSHUFB(XMM0, M(pbswapShuffle2x4));
+      PSHUFB(XMM0, MConst(pbswapShuffle2x4));
       MOVQ_xmm(R(RSCRATCH), XMM0);
     }
     else
@@ -492,13 +497,14 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
     if (quantize == -1)
     {
       SHR(32, R(RSCRATCH2), Imm8(5));
-      MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS)));
+      LEA(64, RSCRATCH, MConst(m_dequantizeTableS));
+      MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH));
     }
     else if (quantize > 0)
     {
-      MULSS(XMM0, M(&m_dequantizeTableS[quantize * 2]));
+      MULSS(XMM0, MConst(m_dequantizeTableS, quantize * 2));
     }
-    UNPCKLPS(XMM0, M(m_one));
+    UNPCKLPS(XMM0, MConst(m_one));
   }
   else
   {
@@ -564,12 +570,13 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
     if (quantize == -1)
     {
       SHR(32, R(RSCRATCH2), Imm8(5));
-      MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS)));
+      LEA(64, RSCRATCH, MConst(m_dequantizeTableS));
+      MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH));
       MULPS(XMM0, R(XMM1));
     }
     else if (quantize > 0)
     {
-      MOVQ_xmm(XMM1, M(&m_dequantizeTableS[quantize * 2]));
+      MOVQ_xmm(XMM1, MConst(m_dequantizeTableS, quantize * 2));
       MULPS(XMM0, R(XMM1));
     }
   }
@@ -597,7 +604,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
     else if (cpu_info.bSSSE3)
     {
       MOVD_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA));
-      PSHUFB(XMM0, M(pbswapShuffle1x4));
+      PSHUFB(XMM0, MConst(pbswapShuffle1x4));
     }
     else
     {
@@ -605,7 +612,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
       MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
     }
 
-    UNPCKLPS(XMM0, M(m_one));
+    UNPCKLPS(XMM0, MConst(m_one));
   }
   else
   {
@@ -623,7 +630,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
     else if (cpu_info.bSSSE3)
     {
       MOVQ_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA));
-      PSHUFB(XMM0, M(pbswapShuffle2x4));
+      PSHUFB(XMM0, MConst(pbswapShuffle2x4));
     }
     else
     {

From 9058ccea3fabeac1d558ad5783f50dc724a3163b Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 19 Mar 2017 13:49:41 +0000
Subject: [PATCH 8/9] IR_X86: Use MConst for constants

---
 Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
index 97bb83fa9e..b24d16c95a 100644
--- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
+++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
@@ -1753,7 +1753,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
 
       X64Reg reg = fregURegWithMov(RI, I);
       alignas(16) static const u32 ssSignBits[4] = {0x80000000};
-      Jit->PXOR(reg, M(ssSignBits));
+      Jit->PXOR(reg, Jit->MConst(ssSignBits));
       RI.fregs[reg] = I;
       fregNormalRegClear(RI, I);
       break;
@@ -1765,7 +1765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
 
       X64Reg reg = fregURegWithMov(RI, I);
       alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL};
-      Jit->PXOR(reg, M(sdSignBits));
+      Jit->PXOR(reg, Jit->MConst(sdSignBits));
       RI.fregs[reg] = I;
       fregNormalRegClear(RI, I);
       break;
@@ -1777,7 +1777,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
 
       X64Reg reg = fregURegWithMov(RI, I);
       alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000};
-      Jit->PXOR(reg, M(psSignBits));
+      Jit->PXOR(reg, Jit->MConst(psSignBits));
       RI.fregs[reg] = I;
       fregNormalRegClear(RI, I);
       break;

From 4491e9b8297a4f554601f7d3167af96e32dd7831 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 20 Mar 2017 19:40:22 +0000
Subject: [PATCH 9/9] Jit_SystemRegisters: Add missing sizes to constant arrays

---
 Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
index a9ae18806a..f60b15488d 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
@@ -657,7 +657,7 @@ void Jit64::mffsx(UGeckoInstruction inst)
 }
 
 // MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
-static const u32 s_fpscr_to_mxcsr[] = {
+static const u32 s_fpscr_to_mxcsr[8] = {
     0x1F80, 0x7F80, 0x5F80, 0x3F80, 0x9F80, 0xFF80, 0xDF80, 0xBF80,
 };