diff --git a/Source/Core/Common/MathUtil.cpp b/Source/Core/Common/MathUtil.cpp index bdb45e89f9..75c7f0278a 100644 --- a/Source/Core/Common/MathUtil.cpp +++ b/Source/Core/Common/MathUtil.cpp @@ -90,17 +90,16 @@ u32 ClassifyFloat(float fvalue) } } -const int frsqrte_expected_base[] = { - 0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000, - 0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000, - 0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000, - 0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800, -}; -const int frsqrte_expected_dec[] = { - 0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2, - 0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2, - 0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b, -}; +const std::array frsqrte_expected = {{ + {0x3ffa000, 0x7a4}, {0x3c29000, 0x700}, {0x38aa000, 0x670}, {0x3572000, 0x5f2}, + {0x3279000, 0x584}, {0x2fb7000, 0x524}, {0x2d26000, 0x4cc}, {0x2ac0000, 0x47e}, + {0x2881000, 0x43a}, {0x2665000, 0x3fa}, {0x2468000, 0x3c2}, {0x2287000, 0x38e}, + {0x20c1000, 0x35e}, {0x1f12000, 0x332}, {0x1d79000, 0x30a}, {0x1bf4000, 0x2e6}, + {0x1a7e800, 0x568}, {0x17cb800, 0x4f3}, {0x1552800, 0x48d}, {0x130c000, 0x435}, + {0x10f2000, 0x3e7}, {0x0eff000, 0x3a2}, {0x0d2e000, 0x365}, {0x0b7c000, 0x32e}, + {0x09e5000, 0x2fc}, {0x0867000, 0x2d0}, {0x06ff000, 0x2a8}, {0x05ab800, 0x283}, + {0x046a000, 0x261}, {0x0339800, 0x243}, {0x0218800, 0x226}, {0x0105800, 0x20b}, +}}; double ApproximateReciprocalSquareRoot(double val) { @@ -154,21 +153,20 @@ double ApproximateReciprocalSquareRoot(double val) int i = (int)(mantissa >> 37); vali = sign | exponent; int index = i / 2048 + (odd_exponent ? 16 : 0); - vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; + auto& entry = frsqrte_expected[index]; + vali |= (s64)(entry.m_base - entry.m_dec * (i % 2048)) << 26; return valf; } -const int fres_expected_base[] = { - 0x7ff800, 0x783800, 0x70ea00, 0x6a0800, 0x638800, 0x5d6200, 0x579000, 0x520800, - 0x4cc800, 0x47ca00, 0x430800, 0x3e8000, 0x3a2c00, 0x360800, 0x321400, 0x2e4a00, - 0x2aa800, 0x272c00, 0x23d600, 0x209e00, 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800, - 0x124400, 0x0fbe00, 0x0d3800, 0x0ade00, 0x088400, 0x065000, 0x041c00, 0x020c00, -}; -const int fres_expected_dec[] = { - 0x3e1, 0x3a7, 0x371, 0x340, 0x313, 0x2ea, 0x2c4, 0x2a0, 0x27f, 0x261, 0x245, - 0x22a, 0x212, 0x1fb, 0x1e5, 0x1d1, 0x1be, 0x1ac, 0x19b, 0x18b, 0x17c, 0x16e, - 0x15b, 0x15b, 0x143, 0x143, 0x12d, 0x12d, 0x11a, 0x11a, 0x108, 0x106, -}; +const std::array fres_expected = {{ + {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313}, + {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261}, + {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5}, + {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b}, + {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143}, + {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a}, + {0x041c00, 0x108}, {0x020c00, 0x106}, +}}; // Used by fres and ps_res. double ApproximateReciprocal(double val) @@ -213,9 +211,9 @@ double ApproximateReciprocal(double val) exponent = (0x7FDLL << 52) - exponent; int i = (int)(mantissa >> 37); + auto& entry = fres_expected[i / 1024]; vali = sign | exponent; - vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) - << 29; + vali |= (s64)(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29; return valf; } diff --git a/Source/Core/Common/MathUtil.h b/Source/Core/Common/MathUtil.h index e91358c5c1..7e48e38d80 100644 --- a/Source/Core/Common/MathUtil.h +++ b/Source/Core/Common/MathUtil.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -131,10 +132,13 @@ u32 ClassifyDouble(double dvalue); // More efficient float version. u32 ClassifyFloat(float fvalue); -extern const int frsqrte_expected_base[]; -extern const int frsqrte_expected_dec[]; -extern const int fres_expected_base[]; -extern const int fres_expected_dec[]; +struct BaseAndDec +{ + int m_base; + int m_dec; +}; +extern const std::array frsqrte_expected; +extern const std::array fres_expected; // PowerPC approximation algorithms double ApproximateReciprocalSquareRoot(double val); diff --git a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp index cd8da238a0..24db1f12c4 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp @@ -7,7 +7,6 @@ #include #include "Common/Assert.h" -#include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64Common/ConstantPool.h" ConstantPool::ConstantPool() = default; @@ -37,8 +36,8 @@ void ConstantPool::Shutdown() m_const_info.clear(); } -Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size, - size_t num_elements, size_t index) +const void* ConstantPool::GetConstant(const void* value, size_t element_size, size_t num_elements, + size_t index) { const size_t value_size = element_size * num_elements; auto iter = m_const_info.find(value); @@ -59,5 +58,5 @@ Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size _assert_msg_(DYNA_REC, info.m_size == value_size, "Constant has incorrect size in constant pool."); u8* location = static_cast(info.m_location); - return Gen::M(location + element_size * index); + return location + element_size * index; } diff --git a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h index 732a41af9d..51e6e84d7d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h +++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h @@ -7,12 +7,6 @@ #include #include -namespace Gen -{ -struct OpArg; -class X64CodeBlock; -} - // Constants are copied into this pool so that they live at a memory location // that is close to the code that references it. This ensures that the 32-bit // limitation on RIP addressing is not an issue. @@ -32,8 +26,8 @@ public: // Copies the value into the pool if it doesn't exist. Returns a pointer // to existing values if they were already copied. Pointer equality is // used to determine if two constants are the same. - Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements, - size_t index); + const void* GetConstant(const void* value, size_t element_size, size_t num_elements, + size_t index); private: struct ConstantInfo diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 244494a31e..20e44d0bc0 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -29,16 +29,22 @@ public: void SwitchToFarCode(); void SwitchToNearCode(); + template + const void* GetConstantFromPool(const T& value) + { + return m_const_pool.GetConstant(&value, sizeof(T), 1, 0); + } + template Gen::OpArg MConst(const T& value) { - return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0); + return Gen::M(GetConstantFromPool(value)); } template Gen::OpArg MConst(const T (&value)[N], size_t index = 0) { - return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index); + return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index)); } Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 876d24c73e..dce31b4d4b 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -69,13 +69,20 @@ void CommonAsmRoutines::GenFrsqrte() AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); + PUSH(RSCRATCH2); + MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(MathUtil::frsqrte_expected))); + static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size"); + SHR(64, R(RSCRATCH), Imm8(37)); AND(32, R(RSCRATCH), Imm32(0x7FF)); - IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_dec))); + IMUL(32, RSCRATCH, + MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec))); MOV(32, R(RSCRATCH_EXTRA), - MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_base))); + MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base))); SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); + + POP(RSCRATCH2); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - // frsqrte_expected_dec[index] * (i % 2048)) << 26; MOVQ_xmm(XMM0, R(RSCRATCH2)); @@ -140,13 +147,22 @@ void CommonAsmRoutines::GenFres() AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_dec))); + PUSH(RSCRATCH_EXTRA); + MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(MathUtil::fres_expected))); + static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size"); + + IMUL(32, RSCRATCH, + MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec))); ADD(32, R(RSCRATCH), Imm8(1)); SHR(32, R(RSCRATCH), Imm8(1)); - MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_base))); + MOV(32, R(RSCRATCH2), + MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base))); SUB(32, R(RSCRATCH2), R(RSCRATCH)); SHL(64, R(RSCRATCH2), Imm8(29)); + + POP(RSCRATCH_EXTRA); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) // << 29