Merge pull request #5246 from MerryMage/math-util

Jit64AsmCommon: Make frsqrte and fres PIE-compliant
2017-04-11 16:11:19 -04:00 · 2017-04-11 16:11:19 -04:00 · 8ecc5e9b7a
parent 464df471ff f7ed979e30
commit 8ecc5e9b7a
6 changed files with 64 additions and 47 deletions
--- a/Source/Core/Common/MathUtil.cpp
+++ b/Source/Core/Common/MathUtil.cpp
@ -90,17 +90,16 @@ u32 ClassifyFloat(float fvalue)
  }
 }

-const int frsqrte_expected_base[] = {
-    0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000,
-    0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000,
-    0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000,
-    0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800,
-};
-const int frsqrte_expected_dec[] = {
-    0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2,
-    0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2,
-    0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b,
-};
+const std::array<BaseAndDec, 32> frsqrte_expected = {{
+    {0x3ffa000, 0x7a4}, {0x3c29000, 0x700}, {0x38aa000, 0x670}, {0x3572000, 0x5f2},
+    {0x3279000, 0x584}, {0x2fb7000, 0x524}, {0x2d26000, 0x4cc}, {0x2ac0000, 0x47e},
+    {0x2881000, 0x43a}, {0x2665000, 0x3fa}, {0x2468000, 0x3c2}, {0x2287000, 0x38e},
+    {0x20c1000, 0x35e}, {0x1f12000, 0x332}, {0x1d79000, 0x30a}, {0x1bf4000, 0x2e6},
+    {0x1a7e800, 0x568}, {0x17cb800, 0x4f3}, {0x1552800, 0x48d}, {0x130c000, 0x435},
+    {0x10f2000, 0x3e7}, {0x0eff000, 0x3a2}, {0x0d2e000, 0x365}, {0x0b7c000, 0x32e},
+    {0x09e5000, 0x2fc}, {0x0867000, 0x2d0}, {0x06ff000, 0x2a8}, {0x05ab800, 0x283},
+    {0x046a000, 0x261}, {0x0339800, 0x243}, {0x0218800, 0x226}, {0x0105800, 0x20b},
+}};

 double ApproximateReciprocalSquareRoot(double val)
 {
@ -154,21 +153,20 @@ double ApproximateReciprocalSquareRoot(double val)
  int i = (int)(mantissa >> 37);
  vali = sign | exponent;
  int index = i / 2048 + (odd_exponent ? 16 : 0);
-  vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
+  auto& entry = frsqrte_expected[index];
+  vali |= (s64)(entry.m_base - entry.m_dec * (i % 2048)) << 26;
  return valf;
 }

-const int fres_expected_base[] = {
-    0x7ff800, 0x783800, 0x70ea00, 0x6a0800, 0x638800, 0x5d6200, 0x579000, 0x520800,
-    0x4cc800, 0x47ca00, 0x430800, 0x3e8000, 0x3a2c00, 0x360800, 0x321400, 0x2e4a00,
-    0x2aa800, 0x272c00, 0x23d600, 0x209e00, 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800,
-    0x124400, 0x0fbe00, 0x0d3800, 0x0ade00, 0x088400, 0x065000, 0x041c00, 0x020c00,
-};
-const int fres_expected_dec[] = {
-    0x3e1, 0x3a7, 0x371, 0x340, 0x313, 0x2ea, 0x2c4, 0x2a0, 0x27f, 0x261, 0x245,
-    0x22a, 0x212, 0x1fb, 0x1e5, 0x1d1, 0x1be, 0x1ac, 0x19b, 0x18b, 0x17c, 0x16e,
-    0x15b, 0x15b, 0x143, 0x143, 0x12d, 0x12d, 0x11a, 0x11a, 0x108, 0x106,
-};
+const std::array<BaseAndDec, 32> fres_expected = {{
+    {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313},
+    {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261},
+    {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5},
+    {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
+    {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143},
+    {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a},
+    {0x041c00, 0x108}, {0x020c00, 0x106},
+}};

 // Used by fres and ps_res.
 double ApproximateReciprocal(double val)
@ -213,9 +211,9 @@ double ApproximateReciprocal(double val)
  exponent = (0x7FDLL << 52) - exponent;

  int i = (int)(mantissa >> 37);
+  auto& entry = fres_expected[i / 1024];
  vali = sign | exponent;
-  vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
-          << 29;
+  vali |= (s64)(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29;
  return valf;
 }

--- a/Source/Core/Common/MathUtil.h
+++ b/Source/Core/Common/MathUtil.h
@ -5,6 +5,7 @@
 #pragma once

 #include <algorithm>
+#include <array>
 #include <cstdlib>
 #include <vector>

@ -131,10 +132,13 @@ u32 ClassifyDouble(double dvalue);
 // More efficient float version.
 u32 ClassifyFloat(float fvalue);

-extern const int frsqrte_expected_base[];
-extern const int frsqrte_expected_dec[];
-extern const int fres_expected_base[];
-extern const int fres_expected_dec[];
+struct BaseAndDec
+{
+  int m_base;
+  int m_dec;
+};
+extern const std::array<BaseAndDec, 32> frsqrte_expected;
+extern const std::array<BaseAndDec, 32> fres_expected;

 // PowerPC approximation algorithms
 double ApproximateReciprocalSquareRoot(double val);
--- a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp
@ -7,7 +7,6 @@
 #include <utility>

 #include "Common/Assert.h"
-#include "Common/x64Emitter.h"
 #include "Core/PowerPC/Jit64Common/ConstantPool.h"

 ConstantPool::ConstantPool() = default;
@ -37,8 +36,8 @@ void ConstantPool::Shutdown()
  m_const_info.clear();
 }

-Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size,
-                                          size_t num_elements, size_t index)
+const void* ConstantPool::GetConstant(const void* value, size_t element_size, size_t num_elements,
+                                      size_t index)
 {
  const size_t value_size = element_size * num_elements;
  auto iter = m_const_info.find(value);
@ -59,5 +58,5 @@ Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size
  _assert_msg_(DYNA_REC, info.m_size == value_size,
               "Constant has incorrect size in constant pool.");
  u8* location = static_cast<u8*>(info.m_location);
-  return Gen::M(location + element_size * index);
+  return location + element_size * index;
 }
--- a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h
+++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h
@ -7,12 +7,6 @@
 #include <cstddef>
 #include <map>

-namespace Gen
-{
-struct OpArg;
-class X64CodeBlock;
-}
-
 // Constants are copied into this pool so that they live at a memory location
 // that is close to the code that references it. This ensures that the 32-bit
 // limitation on RIP addressing is not an issue.
@ -32,8 +26,8 @@ public:
  // Copies the value into the pool if it doesn't exist. Returns a pointer
  // to existing values if they were already copied. Pointer equality is
  // used to determine if two constants are the same.
-  Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements,
-                              size_t index);
+  const void* GetConstant(const void* value, size_t element_size, size_t num_elements,
+                          size_t index);

 private:
  struct ConstantInfo
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h
@ -29,16 +29,22 @@ public:
  void SwitchToFarCode();
  void SwitchToNearCode();

+  template <typename T>
+  const void* GetConstantFromPool(const T& value)
+  {
+    return m_const_pool.GetConstant(&value, sizeof(T), 1, 0);
+  }
+
  template <typename T>
  Gen::OpArg MConst(const T& value)
  {
-    return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0);
+    return Gen::M(GetConstantFromPool(value));
  }

  template <typename T, size_t N>
  Gen::OpArg MConst(const T (&value)[N], size_t index = 0)
  {
-    return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index);
+    return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index));
  }

  Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@ -69,13 +69,20 @@ void CommonAsmRoutines::GenFrsqrte()
  AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
  XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10));  // int index = i / 2048 + (odd_exponent ? 16 : 0);

+  PUSH(RSCRATCH2);
+  MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(MathUtil::frsqrte_expected)));
+  static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
+
  SHR(64, R(RSCRATCH), Imm8(37));
  AND(32, R(RSCRATCH), Imm32(0x7FF));
-  IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_dec)));
+  IMUL(32, RSCRATCH,
+       MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec)));
  MOV(32, R(RSCRATCH_EXTRA),
-      MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_base)));
+      MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base)));
  SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
  SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
+
+  POP(RSCRATCH2);
  OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));  // vali |= (s64)(frsqrte_expected_base[index] -
                                            // frsqrte_expected_dec[index] * (i % 2048)) << 26;
  MOVQ_xmm(XMM0, R(RSCRATCH2));
@ -140,13 +147,22 @@ void CommonAsmRoutines::GenFres()
  AND(32, R(RSCRATCH), Imm32(0x3FF));  // i % 1024
  AND(32, R(RSCRATCH2), Imm8(0x1F));   // i / 1024

-  IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_dec)));
+  PUSH(RSCRATCH_EXTRA);
+  MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(MathUtil::fres_expected)));
+  static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
+
+  IMUL(32, RSCRATCH,
+       MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec)));
  ADD(32, R(RSCRATCH), Imm8(1));
  SHR(32, R(RSCRATCH), Imm8(1));

-  MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_base)));
+  MOV(32, R(RSCRATCH2),
+      MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base)));
  SUB(32, R(RSCRATCH2), R(RSCRATCH));
  SHL(64, R(RSCRATCH2), Imm8(29));
+
+  POP(RSCRATCH_EXTRA);
+
  OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));  // vali |= (s64)(fres_expected_base[i / 1024] -
                                            // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
                                            // << 29