Merge pull request #5246 from MerryMage/math-util
Jit64AsmCommon: Make frsqrte and fres PIE-compliant
This commit is contained in:
commit
8ecc5e9b7a
|
@ -90,17 +90,16 @@ u32 ClassifyFloat(float fvalue)
|
|||
}
|
||||
}
|
||||
|
||||
const int frsqrte_expected_base[] = {
|
||||
0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000,
|
||||
0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000,
|
||||
0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000,
|
||||
0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800,
|
||||
};
|
||||
const int frsqrte_expected_dec[] = {
|
||||
0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2,
|
||||
0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2,
|
||||
0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b,
|
||||
};
|
||||
const std::array<BaseAndDec, 32> frsqrte_expected = {{
|
||||
{0x3ffa000, 0x7a4}, {0x3c29000, 0x700}, {0x38aa000, 0x670}, {0x3572000, 0x5f2},
|
||||
{0x3279000, 0x584}, {0x2fb7000, 0x524}, {0x2d26000, 0x4cc}, {0x2ac0000, 0x47e},
|
||||
{0x2881000, 0x43a}, {0x2665000, 0x3fa}, {0x2468000, 0x3c2}, {0x2287000, 0x38e},
|
||||
{0x20c1000, 0x35e}, {0x1f12000, 0x332}, {0x1d79000, 0x30a}, {0x1bf4000, 0x2e6},
|
||||
{0x1a7e800, 0x568}, {0x17cb800, 0x4f3}, {0x1552800, 0x48d}, {0x130c000, 0x435},
|
||||
{0x10f2000, 0x3e7}, {0x0eff000, 0x3a2}, {0x0d2e000, 0x365}, {0x0b7c000, 0x32e},
|
||||
{0x09e5000, 0x2fc}, {0x0867000, 0x2d0}, {0x06ff000, 0x2a8}, {0x05ab800, 0x283},
|
||||
{0x046a000, 0x261}, {0x0339800, 0x243}, {0x0218800, 0x226}, {0x0105800, 0x20b},
|
||||
}};
|
||||
|
||||
double ApproximateReciprocalSquareRoot(double val)
|
||||
{
|
||||
|
@ -154,21 +153,20 @@ double ApproximateReciprocalSquareRoot(double val)
|
|||
int i = (int)(mantissa >> 37);
|
||||
vali = sign | exponent;
|
||||
int index = i / 2048 + (odd_exponent ? 16 : 0);
|
||||
vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||
auto& entry = frsqrte_expected[index];
|
||||
vali |= (s64)(entry.m_base - entry.m_dec * (i % 2048)) << 26;
|
||||
return valf;
|
||||
}
|
||||
|
||||
const int fres_expected_base[] = {
|
||||
0x7ff800, 0x783800, 0x70ea00, 0x6a0800, 0x638800, 0x5d6200, 0x579000, 0x520800,
|
||||
0x4cc800, 0x47ca00, 0x430800, 0x3e8000, 0x3a2c00, 0x360800, 0x321400, 0x2e4a00,
|
||||
0x2aa800, 0x272c00, 0x23d600, 0x209e00, 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800,
|
||||
0x124400, 0x0fbe00, 0x0d3800, 0x0ade00, 0x088400, 0x065000, 0x041c00, 0x020c00,
|
||||
};
|
||||
const int fres_expected_dec[] = {
|
||||
0x3e1, 0x3a7, 0x371, 0x340, 0x313, 0x2ea, 0x2c4, 0x2a0, 0x27f, 0x261, 0x245,
|
||||
0x22a, 0x212, 0x1fb, 0x1e5, 0x1d1, 0x1be, 0x1ac, 0x19b, 0x18b, 0x17c, 0x16e,
|
||||
0x15b, 0x15b, 0x143, 0x143, 0x12d, 0x12d, 0x11a, 0x11a, 0x108, 0x106,
|
||||
};
|
||||
const std::array<BaseAndDec, 32> fres_expected = {{
|
||||
{0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313},
|
||||
{0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261},
|
||||
{0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5},
|
||||
{0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
|
||||
{0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143},
|
||||
{0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a},
|
||||
{0x041c00, 0x108}, {0x020c00, 0x106},
|
||||
}};
|
||||
|
||||
// Used by fres and ps_res.
|
||||
double ApproximateReciprocal(double val)
|
||||
|
@ -213,9 +211,9 @@ double ApproximateReciprocal(double val)
|
|||
exponent = (0x7FDLL << 52) - exponent;
|
||||
|
||||
int i = (int)(mantissa >> 37);
|
||||
auto& entry = fres_expected[i / 1024];
|
||||
vali = sign | exponent;
|
||||
vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
|
||||
<< 29;
|
||||
vali |= (s64)(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29;
|
||||
return valf;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
|
@ -131,10 +132,13 @@ u32 ClassifyDouble(double dvalue);
|
|||
// More efficient float version.
|
||||
u32 ClassifyFloat(float fvalue);
|
||||
|
||||
extern const int frsqrte_expected_base[];
|
||||
extern const int frsqrte_expected_dec[];
|
||||
extern const int fres_expected_base[];
|
||||
extern const int fres_expected_dec[];
|
||||
struct BaseAndDec
|
||||
{
|
||||
int m_base;
|
||||
int m_dec;
|
||||
};
|
||||
extern const std::array<BaseAndDec, 32> frsqrte_expected;
|
||||
extern const std::array<BaseAndDec, 32> fres_expected;
|
||||
|
||||
// PowerPC approximation algorithms
|
||||
double ApproximateReciprocalSquareRoot(double val);
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include <utility>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64Common/ConstantPool.h"
|
||||
|
||||
ConstantPool::ConstantPool() = default;
|
||||
|
@ -37,8 +36,8 @@ void ConstantPool::Shutdown()
|
|||
m_const_info.clear();
|
||||
}
|
||||
|
||||
Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size,
|
||||
size_t num_elements, size_t index)
|
||||
const void* ConstantPool::GetConstant(const void* value, size_t element_size, size_t num_elements,
|
||||
size_t index)
|
||||
{
|
||||
const size_t value_size = element_size * num_elements;
|
||||
auto iter = m_const_info.find(value);
|
||||
|
@ -59,5 +58,5 @@ Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size
|
|||
_assert_msg_(DYNA_REC, info.m_size == value_size,
|
||||
"Constant has incorrect size in constant pool.");
|
||||
u8* location = static_cast<u8*>(info.m_location);
|
||||
return Gen::M(location + element_size * index);
|
||||
return location + element_size * index;
|
||||
}
|
||||
|
|
|
@ -7,12 +7,6 @@
|
|||
#include <cstddef>
|
||||
#include <map>
|
||||
|
||||
namespace Gen
|
||||
{
|
||||
struct OpArg;
|
||||
class X64CodeBlock;
|
||||
}
|
||||
|
||||
// Constants are copied into this pool so that they live at a memory location
|
||||
// that is close to the code that references it. This ensures that the 32-bit
|
||||
// limitation on RIP addressing is not an issue.
|
||||
|
@ -32,8 +26,8 @@ public:
|
|||
// Copies the value into the pool if it doesn't exist. Returns a pointer
|
||||
// to existing values if they were already copied. Pointer equality is
|
||||
// used to determine if two constants are the same.
|
||||
Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements,
|
||||
size_t index);
|
||||
const void* GetConstant(const void* value, size_t element_size, size_t num_elements,
|
||||
size_t index);
|
||||
|
||||
private:
|
||||
struct ConstantInfo
|
||||
|
|
|
@ -29,16 +29,22 @@ public:
|
|||
void SwitchToFarCode();
|
||||
void SwitchToNearCode();
|
||||
|
||||
template <typename T>
|
||||
const void* GetConstantFromPool(const T& value)
|
||||
{
|
||||
return m_const_pool.GetConstant(&value, sizeof(T), 1, 0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Gen::OpArg MConst(const T& value)
|
||||
{
|
||||
return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0);
|
||||
return Gen::M(GetConstantFromPool(value));
|
||||
}
|
||||
|
||||
template <typename T, size_t N>
|
||||
Gen::OpArg MConst(const T (&value)[N], size_t index = 0)
|
||||
{
|
||||
return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index);
|
||||
return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index));
|
||||
}
|
||||
|
||||
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
|
||||
|
|
|
@ -69,13 +69,20 @@ void CommonAsmRoutines::GenFrsqrte()
|
|||
AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
|
||||
XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0);
|
||||
|
||||
PUSH(RSCRATCH2);
|
||||
MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(MathUtil::frsqrte_expected)));
|
||||
static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
|
||||
|
||||
SHR(64, R(RSCRATCH), Imm8(37));
|
||||
AND(32, R(RSCRATCH), Imm32(0x7FF));
|
||||
IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_dec)));
|
||||
IMUL(32, RSCRATCH,
|
||||
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec)));
|
||||
MOV(32, R(RSCRATCH_EXTRA),
|
||||
MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_base)));
|
||||
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base)));
|
||||
SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
|
||||
|
||||
POP(RSCRATCH2);
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] -
|
||||
// frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
|
@ -140,13 +147,22 @@ void CommonAsmRoutines::GenFres()
|
|||
AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024
|
||||
AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
|
||||
|
||||
IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_dec)));
|
||||
PUSH(RSCRATCH_EXTRA);
|
||||
MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(MathUtil::fres_expected)));
|
||||
static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
|
||||
|
||||
IMUL(32, RSCRATCH,
|
||||
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec)));
|
||||
ADD(32, R(RSCRATCH), Imm8(1));
|
||||
SHR(32, R(RSCRATCH), Imm8(1));
|
||||
|
||||
MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_base)));
|
||||
MOV(32, R(RSCRATCH2),
|
||||
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base)));
|
||||
SUB(32, R(RSCRATCH2), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH2), Imm8(29));
|
||||
|
||||
POP(RSCRATCH_EXTRA);
|
||||
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] -
|
||||
// (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
|
||||
// << 29
|
||||
|
|
Loading…
Reference in New Issue