Merge pull request #5246 from MerryMage/math-util

Jit64AsmCommon: Make frsqrte and fres PIE-compliant
This commit is contained in:
Mat M 2017-04-11 16:11:19 -04:00 committed by GitHub
commit 8ecc5e9b7a
6 changed files with 64 additions and 47 deletions

View File

@ -90,17 +90,16 @@ u32 ClassifyFloat(float fvalue)
} }
} }
const int frsqrte_expected_base[] = { const std::array<BaseAndDec, 32> frsqrte_expected = {{
0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000, {0x3ffa000, 0x7a4}, {0x3c29000, 0x700}, {0x38aa000, 0x670}, {0x3572000, 0x5f2},
0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000, {0x3279000, 0x584}, {0x2fb7000, 0x524}, {0x2d26000, 0x4cc}, {0x2ac0000, 0x47e},
0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000, {0x2881000, 0x43a}, {0x2665000, 0x3fa}, {0x2468000, 0x3c2}, {0x2287000, 0x38e},
0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800, {0x20c1000, 0x35e}, {0x1f12000, 0x332}, {0x1d79000, 0x30a}, {0x1bf4000, 0x2e6},
}; {0x1a7e800, 0x568}, {0x17cb800, 0x4f3}, {0x1552800, 0x48d}, {0x130c000, 0x435},
const int frsqrte_expected_dec[] = { {0x10f2000, 0x3e7}, {0x0eff000, 0x3a2}, {0x0d2e000, 0x365}, {0x0b7c000, 0x32e},
0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2, {0x09e5000, 0x2fc}, {0x0867000, 0x2d0}, {0x06ff000, 0x2a8}, {0x05ab800, 0x283},
0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2, {0x046a000, 0x261}, {0x0339800, 0x243}, {0x0218800, 0x226}, {0x0105800, 0x20b},
0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b, }};
};
double ApproximateReciprocalSquareRoot(double val) double ApproximateReciprocalSquareRoot(double val)
{ {
@ -154,21 +153,20 @@ double ApproximateReciprocalSquareRoot(double val)
int i = (int)(mantissa >> 37); int i = (int)(mantissa >> 37);
vali = sign | exponent; vali = sign | exponent;
int index = i / 2048 + (odd_exponent ? 16 : 0); int index = i / 2048 + (odd_exponent ? 16 : 0);
vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; auto& entry = frsqrte_expected[index];
vali |= (s64)(entry.m_base - entry.m_dec * (i % 2048)) << 26;
return valf; return valf;
} }
const int fres_expected_base[] = { const std::array<BaseAndDec, 32> fres_expected = {{
0x7ff800, 0x783800, 0x70ea00, 0x6a0800, 0x638800, 0x5d6200, 0x579000, 0x520800, {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313},
0x4cc800, 0x47ca00, 0x430800, 0x3e8000, 0x3a2c00, 0x360800, 0x321400, 0x2e4a00, {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261},
0x2aa800, 0x272c00, 0x23d600, 0x209e00, 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800, {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5},
0x124400, 0x0fbe00, 0x0d3800, 0x0ade00, 0x088400, 0x065000, 0x041c00, 0x020c00, {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
}; {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143},
const int fres_expected_dec[] = { {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a},
0x3e1, 0x3a7, 0x371, 0x340, 0x313, 0x2ea, 0x2c4, 0x2a0, 0x27f, 0x261, 0x245, {0x041c00, 0x108}, {0x020c00, 0x106},
0x22a, 0x212, 0x1fb, 0x1e5, 0x1d1, 0x1be, 0x1ac, 0x19b, 0x18b, 0x17c, 0x16e, }};
0x15b, 0x15b, 0x143, 0x143, 0x12d, 0x12d, 0x11a, 0x11a, 0x108, 0x106,
};
// Used by fres and ps_res. // Used by fres and ps_res.
double ApproximateReciprocal(double val) double ApproximateReciprocal(double val)
@ -213,9 +211,9 @@ double ApproximateReciprocal(double val)
exponent = (0x7FDLL << 52) - exponent; exponent = (0x7FDLL << 52) - exponent;
int i = (int)(mantissa >> 37); int i = (int)(mantissa >> 37);
auto& entry = fres_expected[i / 1024];
vali = sign | exponent; vali = sign | exponent;
vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) vali |= (s64)(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29;
<< 29;
return valf; return valf;
} }

View File

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <array>
#include <cstdlib> #include <cstdlib>
#include <vector> #include <vector>
@ -131,10 +132,13 @@ u32 ClassifyDouble(double dvalue);
// More efficient float version. // More efficient float version.
u32 ClassifyFloat(float fvalue); u32 ClassifyFloat(float fvalue);
extern const int frsqrte_expected_base[]; struct BaseAndDec
extern const int frsqrte_expected_dec[]; {
extern const int fres_expected_base[]; int m_base;
extern const int fres_expected_dec[]; int m_dec;
};
extern const std::array<BaseAndDec, 32> frsqrte_expected;
extern const std::array<BaseAndDec, 32> fres_expected;
// PowerPC approximation algorithms // PowerPC approximation algorithms
double ApproximateReciprocalSquareRoot(double val); double ApproximateReciprocalSquareRoot(double val);

View File

@ -7,7 +7,6 @@
#include <utility> #include <utility>
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64Common/ConstantPool.h" #include "Core/PowerPC/Jit64Common/ConstantPool.h"
ConstantPool::ConstantPool() = default; ConstantPool::ConstantPool() = default;
@ -37,8 +36,8 @@ void ConstantPool::Shutdown()
m_const_info.clear(); m_const_info.clear();
} }
Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size, const void* ConstantPool::GetConstant(const void* value, size_t element_size, size_t num_elements,
size_t num_elements, size_t index) size_t index)
{ {
const size_t value_size = element_size * num_elements; const size_t value_size = element_size * num_elements;
auto iter = m_const_info.find(value); auto iter = m_const_info.find(value);
@ -59,5 +58,5 @@ Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size
_assert_msg_(DYNA_REC, info.m_size == value_size, _assert_msg_(DYNA_REC, info.m_size == value_size,
"Constant has incorrect size in constant pool."); "Constant has incorrect size in constant pool.");
u8* location = static_cast<u8*>(info.m_location); u8* location = static_cast<u8*>(info.m_location);
return Gen::M(location + element_size * index); return location + element_size * index;
} }

View File

@ -7,12 +7,6 @@
#include <cstddef> #include <cstddef>
#include <map> #include <map>
namespace Gen
{
struct OpArg;
class X64CodeBlock;
}
// Constants are copied into this pool so that they live at a memory location // Constants are copied into this pool so that they live at a memory location
// that is close to the code that references it. This ensures that the 32-bit // that is close to the code that references it. This ensures that the 32-bit
// limitation on RIP addressing is not an issue. // limitation on RIP addressing is not an issue.
@ -32,7 +26,7 @@ public:
// Copies the value into the pool if it doesn't exist. Returns a pointer // Copies the value into the pool if it doesn't exist. Returns a pointer
// to existing values if they were already copied. Pointer equality is // to existing values if they were already copied. Pointer equality is
// used to determine if two constants are the same. // used to determine if two constants are the same.
Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements, const void* GetConstant(const void* value, size_t element_size, size_t num_elements,
size_t index); size_t index);
private: private:

View File

@ -29,16 +29,22 @@ public:
void SwitchToFarCode(); void SwitchToFarCode();
void SwitchToNearCode(); void SwitchToNearCode();
template <typename T>
const void* GetConstantFromPool(const T& value)
{
return m_const_pool.GetConstant(&value, sizeof(T), 1, 0);
}
template <typename T> template <typename T>
Gen::OpArg MConst(const T& value) Gen::OpArg MConst(const T& value)
{ {
return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0); return Gen::M(GetConstantFromPool(value));
} }
template <typename T, size_t N> template <typename T, size_t N>
Gen::OpArg MConst(const T (&value)[N], size_t index = 0) Gen::OpArg MConst(const T (&value)[N], size_t index = 0)
{ {
return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index); return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index));
} }
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,

View File

@ -69,13 +69,20 @@ void CommonAsmRoutines::GenFrsqrte()
AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0);
PUSH(RSCRATCH2);
MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(MathUtil::frsqrte_expected)));
static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
SHR(64, R(RSCRATCH), Imm8(37)); SHR(64, R(RSCRATCH), Imm8(37));
AND(32, R(RSCRATCH), Imm32(0x7FF)); AND(32, R(RSCRATCH), Imm32(0x7FF));
IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_dec))); IMUL(32, RSCRATCH,
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec)));
MOV(32, R(RSCRATCH_EXTRA), MOV(32, R(RSCRATCH_EXTRA),
MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_base))); MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base)));
SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
POP(RSCRATCH2);
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] -
// frsqrte_expected_dec[index] * (i % 2048)) << 26; // frsqrte_expected_dec[index] * (i % 2048)) << 26;
MOVQ_xmm(XMM0, R(RSCRATCH2)); MOVQ_xmm(XMM0, R(RSCRATCH2));
@ -140,13 +147,22 @@ void CommonAsmRoutines::GenFres()
AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024
AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_dec))); PUSH(RSCRATCH_EXTRA);
MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(MathUtil::fres_expected)));
static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
IMUL(32, RSCRATCH,
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec)));
ADD(32, R(RSCRATCH), Imm8(1)); ADD(32, R(RSCRATCH), Imm8(1));
SHR(32, R(RSCRATCH), Imm8(1)); SHR(32, R(RSCRATCH), Imm8(1));
MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_base))); MOV(32, R(RSCRATCH2),
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base)));
SUB(32, R(RSCRATCH2), R(RSCRATCH)); SUB(32, R(RSCRATCH2), R(RSCRATCH));
SHL(64, R(RSCRATCH2), Imm8(29)); SHL(64, R(RSCRATCH2), Imm8(29));
POP(RSCRATCH_EXTRA);
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] -
// (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
// << 29 // << 29