From b0d6c29073ca1996c3e130d5b3de922cb9ac671f Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:30:41 +0000 Subject: [PATCH 1/9] JitAsmCommon: Add missing sizes to constant arrays This allows generic code to determine the size of these arrays. --- Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp | 6 +++--- Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 6378fe2860..783e15aa50 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -7,7 +7,7 @@ alignas(16) const u8 pbswapShuffle1x4[16] = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; alignas(16) const u8 pbswapShuffle2x4[16] = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; -alignas(16) const float m_quantizeTableS[] = { +alignas(16) const float m_quantizeTableS[128] = { (1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1), (1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3), (1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5), @@ -42,7 +42,7 @@ alignas(16) const float m_quantizeTableS[] = { 1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1), }; -alignas(16) const float m_dequantizeTableS[] = { +alignas(16) const float m_dequantizeTableS[128] = { 1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3), 1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5), @@ -77,4 +77,4 @@ alignas(16) const float m_dequantizeTableS[] = { (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1), }; -alignas(16) const float m_one[] = {1.0f, 0.0f, 0.0f, 0.0f}; +alignas(16) const float m_one[4] = {1.0f, 0.0f, 0.0f, 0.0f}; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index f8d997ca57..73c920cf18 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -8,9 +8,9 @@ alignas(16) extern const u8 pbswapShuffle1x4[16]; alignas(16) extern const u8 pbswapShuffle2x4[16]; -alignas(16) extern const float m_one[]; -alignas(16) extern const float m_quantizeTableS[]; -alignas(16) extern const float m_dequantizeTableS[]; +alignas(16) extern const float m_one[4]; +alignas(16) extern const float m_quantizeTableS[128]; +alignas(16) extern const float m_dequantizeTableS[128]; class CommonAsmRoutinesBase { From 9951961338349aba00983668419b72a688e1df5e Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:33:22 +0000 Subject: [PATCH 2/9] ConstantPool: Implement a constant pool Constants are copied into this pool so that they live at a memory location that is close to the code that references it. The pool allocates memory from a provided X64CodeBlock to use. The purpose of the pool is to overcome the 32-bit offset limitation that RIP-relative addressing has.` --- Source/Core/Core/CMakeLists.txt | 1 + Source/Core/Core/Core.vcxproj | 2 + Source/Core/Core/Core.vcxproj.filters | 6 ++ .../Core/PowerPC/Jit64Common/ConstantPool.cpp | 67 +++++++++++++++++++ .../Core/PowerPC/Jit64Common/ConstantPool.h | 52 ++++++++++++++ 5 files changed, 128 insertions(+) create mode 100644 Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp create mode 100644 Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index c9704450ab..13d55216cc 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -244,6 +244,7 @@ if(_M_X86) PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp PowerPC/Jit64Common/BlockCache.cpp + PowerPC/Jit64Common/ConstantPool.cpp PowerPC/Jit64Common/EmuCodeBlock.cpp PowerPC/Jit64Common/FarCodeCache.cpp PowerPC/Jit64Common/Jit64AsmCommon.cpp diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index 86edd95d87..049aba4703 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -243,6 +243,7 @@ + @@ -484,6 +485,7 @@ + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index 18bce72497..e108623f61 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -867,6 +867,9 @@ IOS\USB\Bluetooth + + PowerPC\Jit64Common + @@ -1487,6 +1490,9 @@ IOS + + PowerPC\Jit64Common + diff --git a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp new file mode 100644 index 0000000000..20c3f46802 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.cpp @@ -0,0 +1,67 @@ +// Copyright 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64Common/ConstantPool.h" + +ConstantPool::ConstantPool(Gen::X64CodeBlock* parent) : m_parent(parent) +{ +} + +ConstantPool::~ConstantPool() = default; + +void ConstantPool::AllocCodeSpace() +{ + _assert_(!m_current_ptr); + Init(); +} + +void ConstantPool::ClearCodeSpace() +{ + Init(); +} + +Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size, + size_t num_elements, size_t index) +{ + const size_t value_size = element_size * num_elements; + auto iter = m_const_info.find(value); + + if (iter == m_const_info.end()) + { + void* ptr = std::align(ALIGNMENT, value_size, m_current_ptr, m_remaining_size); + _assert_msg_(DYNA_REC, ptr, "Constant pool has run out of space."); + + m_current_ptr = static_cast(m_current_ptr) + value_size; + m_remaining_size -= value_size; + + std::memcpy(ptr, value, value_size); + iter = m_const_info.emplace(std::make_pair(value, ConstantInfo{ptr, value_size})).first; + } + + const ConstantInfo& info = iter->second; + _assert_msg_(DYNA_REC, info.m_size == value_size, + "Constant has incorrect size in constant pool."); + u8* location = static_cast(info.m_location); + return Gen::M(location + element_size * index); +} + +void ConstantPool::Init() +{ + // If execution happens to run to the start of the constant pool, halt. + m_parent->INT3(); + m_parent->AlignCode16(); + + // Reserve a block of memory CONST_POOL_SIZE in size. + m_current_ptr = m_parent->GetWritableCodePtr(); + m_parent->ReserveCodeSpace(CONST_POOL_SIZE); + + m_remaining_size = CONST_POOL_SIZE; + m_const_info.clear(); +} diff --git a/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h new file mode 100644 index 0000000000..eba5cbec0f --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/ConstantPool.h @@ -0,0 +1,52 @@ +// Copyright 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +namespace Gen +{ +struct OpArg; +class X64CodeBlock; +} + +// Constants are copied into this pool so that they live at a memory location +// that is close to the code that references it. This ensures that the 32-bit +// limitation on RIP addressing is not an issue. +class ConstantPool +{ +public: + static constexpr size_t CONST_POOL_SIZE = 1024 * 32; + static constexpr size_t ALIGNMENT = 16; + + explicit ConstantPool(Gen::X64CodeBlock* parent); + ~ConstantPool(); + + // ConstantPool reserves CONST_POOL_SIZE bytes from parent, and uses + // that space to store its constants. + void AllocCodeSpace(); + void ClearCodeSpace(); + + // Copies the value into the pool if it doesn't exist. Returns a pointer + // to existing values if they were already copied. Pointer equality is + // used to determine if two constants are the same. + Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements, + size_t index); + +private: + void Init(); + + struct ConstantInfo + { + void* m_location; + size_t m_size; + }; + + Gen::X64CodeBlock* m_parent; + void* m_current_ptr = nullptr; + size_t m_remaining_size = CONST_POOL_SIZE; + std::map m_const_info; +}; From ff441efc268bb0b042b822d74d73424dedd80625 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:37:25 +0000 Subject: [PATCH 3/9] EmuCodeBlock: Use ConstantPool --- Source/Core/Common/CodeBlock.h | 4 ++-- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 12 ++++++++++++ .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 17 +++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Source/Core/Common/CodeBlock.h b/Source/Core/Common/CodeBlock.h index 7bd554b9aa..28a4919e66 100644 --- a/Source/Core/Common/CodeBlock.h +++ b/Source/Core/Common/CodeBlock.h @@ -42,7 +42,7 @@ public: } // Call this before you generate any code. - void AllocCodeSpace(size_t size, bool need_low = true) + virtual void AllocCodeSpace(size_t size, bool need_low = true) { region_size = size; region = static_cast(Common::AllocateExecutableMemory(region_size, need_low)); @@ -51,7 +51,7 @@ public: // Always clear code space with breakpoints, so that if someone accidentally executes // uninitialized, it just breaks into the debugger. - void ClearCodeSpace() + virtual void ClearCodeSpace() { PoisonMemory(); ResetCodePtr(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 353aaed52d..14e06d7b6c 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -40,6 +40,18 @@ OpArg FixImmediate(int access_size, OpArg arg) } } // Anonymous namespace +void EmuCodeBlock::ClearCodeSpace() +{ + X64CodeBlock::ClearCodeSpace(); + m_const_pool.ClearCodeSpace(); +} + +void EmuCodeBlock::AllocCodeSpace(size_t size, bool need_low) +{ + X64CodeBlock::AllocCodeSpace(size + ConstantPool::CONST_POOL_SIZE, need_low); + m_const_pool.AllocCodeSpace(); +} + void EmuCodeBlock::MemoryExceptionCheck() { // TODO: We really should untangle the trampolines, exception handlers and diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 69e6359947..08bafb49d7 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -10,6 +10,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64Common/ConstantPool.h" #include "Core/PowerPC/Jit64Common/FarCodeCache.h" #include "Core/PowerPC/Jit64Common/TrampolineInfo.h" @@ -22,12 +23,27 @@ class Mapping; class EmuCodeBlock : public Gen::X64CodeBlock { public: + void ClearCodeSpace() override; + void AllocCodeSpace(size_t size, bool need_low = true) override; + void MemoryExceptionCheck(); // Simple functions to switch between near and far code emitting void SwitchToFarCode(); void SwitchToNearCode(); + template + Gen::OpArg MConst(const T& value) + { + return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0); + } + + template + Gen::OpArg MConst(const T (&value)[N], size_t index = 0) + { + return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index); + } + Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, BitSet32 registers_in_use); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, @@ -105,6 +121,7 @@ public: void Clear(); protected: + ConstantPool m_const_pool{this}; FarCodeCache m_far_code; u8* m_near_code; // Backed up when we switch to far code. From 0fe234ec9baa66c7c3e992d3f5f1feb6307b0440 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:39:10 +0000 Subject: [PATCH 4/9] Jit_FloatingPoint: Use MConst for constants --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 63e78136ba..a331569470 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -108,7 +108,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re UCOMISD(xmm, R(xmm)); fixups.push_back(J_CC(CC_P)); } - MOVDDUP(xmm, M(psGeneratedQNaN)); + MOVDDUP(xmm, MConst(psGeneratedQNaN)); for (FixupBranch fixup : fixups) SetJumpTarget(fixup); FixupBranch done = J(true); @@ -127,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re SwitchToFarCode(); SetJumpTarget(handle_nan); _assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0"); - BLENDVPD(xmm, M(psGeneratedQNaN)); + BLENDVPD(xmm, MConst(psGeneratedQNaN)); for (u32 x : inputs) { avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD); @@ -151,7 +151,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re SetJumpTarget(handle_nan); MOVAPD(tmp, R(clobber)); ANDNPD(clobber, R(xmm)); - ANDPD(tmp, M(psGeneratedQNaN)); + ANDPD(tmp, MConst(psGeneratedQNaN)); ORPD(tmp, R(clobber)); MOVAPD(xmm, R(tmp)); for (u32 x : inputs) @@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst) ADDSD(XMM1, fpr.R(b)); } if (inst.SUBOP5 == 31) // nmadd - XORPD(XMM1, M(packed ? psSignBits2 : psSignBits)); + XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits)); } fpr.BindToRegister(d, !single); if (single) @@ -385,15 +385,15 @@ void Jit64::fsign(UGeckoInstruction inst) { case 40: // neg avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src, - M(packed ? psSignBits2 : psSignBits), packed); + MConst(packed ? psSignBits2 : psSignBits), packed); break; case 136: // nabs - avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), - packed); + avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, + MConst(packed ? psSignBits2 : psSignBits), packed); break; case 264: // abs - avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask), - packed); + avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, + MConst(packed ? psAbsMask2 : psAbsMask), packed); break; default: PanicAlert("fsign bleh"); @@ -608,7 +608,7 @@ void Jit64::fctiwx(UGeckoInstruction inst) // The upper 32 bits of the result are set to 0xfff80000, // except for -0.0 where they are set to 0xfff80001 (TODO). - MOVAPD(XMM0, M(half_qnan_and_s32_max)); + MOVAPD(XMM0, MConst(half_qnan_and_s32_max)); MINSD(XMM0, fpr.R(b)); switch (inst.SUBOP10) { From 8b93baefb7f111d9fc232282230bc7fca83b304c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:39:47 +0000 Subject: [PATCH 5/9] Jit_SystemRegisters: Use MConst for constants --- Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 2127b1f62d..a9ae18806a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -623,7 +623,7 @@ void Jit64::mcrfs(UGeckoInstruction inst) } AND(32, R(RSCRATCH), Imm32(mask)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); - LEA(64, RSCRATCH, M(m_crTable.data())); + LEA(64, RSCRATCH, MConst(m_crTable)); MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); } @@ -664,7 +664,7 @@ static const u32 s_fpscr_to_mxcsr[] = { // Needs value of FPSCR in RSCRATCH. void Jit64::UpdateMXCSR() { - LEA(64, RSCRATCH2, M(&s_fpscr_to_mxcsr)); + LEA(64, RSCRATCH2, MConst(s_fpscr_to_mxcsr)); AND(32, R(RSCRATCH), Imm32(7)); LDMXCSR(MComplex(RSCRATCH2, RSCRATCH, SCALE_4, 0)); } @@ -730,7 +730,7 @@ void Jit64::mtfsfix(UGeckoInstruction inst) // Field 7 contains NI and RN. if (inst.CRFD == 7) - LDMXCSR(M(&s_fpscr_to_mxcsr[imm & 7])); + LDMXCSR(MConst(s_fpscr_to_mxcsr, imm & 7)); } void Jit64::mtfsfx(UGeckoInstruction inst) From 4814c4ac5a26115bb6aac7f131098ab76978888a Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:40:10 +0000 Subject: [PATCH 6/9] EmuCodeBlock: Use MConst for constants --- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 14e06d7b6c..3a35c8dfa7 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -848,16 +848,16 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg // mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1); if (input.IsSimpleReg() && cpu_info.bAVX) { - VPAND(tmp, input.GetSimpleReg(), M(psRoundBit)); - VPAND(output, input.GetSimpleReg(), M(psMantissaTruncate)); + VPAND(tmp, input.GetSimpleReg(), MConst(psRoundBit)); + VPAND(output, input.GetSimpleReg(), MConst(psMantissaTruncate)); PADDQ(output, R(tmp)); } else { if (!input.IsSimpleReg(output)) MOVAPD(output, input); - avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true); - PAND(output, M(psMantissaTruncate)); + avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), MConst(psRoundBit), true, true); + PAND(output, MConst(psMantissaTruncate)); PADDQ(output, R(tmp)); } } @@ -902,7 +902,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) MOVSD(XMM1, R(src)); // Grab Exponent - PAND(XMM1, M(&double_exponent)); + PAND(XMM1, MConst(double_exponent)); PSRLQ(XMM1, 52); MOVD_xmm(R(RSCRATCH), XMM1); @@ -921,15 +921,15 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // xmm1 = fraction | 0x0010000000000000 MOVSD(XMM1, R(src)); - PAND(XMM1, M(&double_fraction)); - POR(XMM1, M(&double_explicit_top_bit)); + PAND(XMM1, MConst(double_fraction)); + POR(XMM1, MConst(double_explicit_top_bit)); // fraction >> shift PSRLQ(XMM1, R(XMM0)); // OR the sign bit in. MOVSD(XMM0, R(src)); - PAND(XMM0, M(&double_sign_bit)); + PAND(XMM0, MConst(double_sign_bit)); PSRLQ(XMM0, 32); POR(XMM1, R(XMM0)); @@ -942,12 +942,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // We want bits 0, 1 MOVSD(XMM1, R(src)); - PAND(XMM1, M(&double_top_two_bits)); + PAND(XMM1, MConst(double_top_two_bits)); PSRLQ(XMM1, 32); // And 5 through to 34 MOVSD(XMM0, R(src)); - PAND(XMM0, M(&double_bottom_bits)); + PAND(XMM0, MConst(double_bottom_bits)); PSRLQ(XMM0, 29); // OR them togther @@ -979,8 +979,8 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // Here, check to see if the source is small enough that it will result in a denormal, and pass it // to the x87 unit // if it is. - avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), M(&double_sign_bit), true, true); - UCOMISD(XMM0, M(&min_norm_single)); + avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), MConst(double_sign_bit), true, true); + UCOMISD(XMM0, MConst(min_norm_single)); FixupBranch nanConversion = J_CC(CC_P, true); FixupBranch denormalConversion = J_CC(CC_B, true); CVTSD2SS(dst, R(src)); @@ -994,7 +994,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) FixupBranch continue1 = J_CC(CC_C, true); // Clear the quiet bit of the SNaN, which was 0 (signalling) but got set to 1 (quiet) by // conversion. - ANDPS(dst, M(&single_qnan_bit)); + ANDPS(dst, MConst(single_qnan_bit)); FixupBranch continue2 = J(true); SetJumpTarget(denormalConversion); @@ -1037,7 +1037,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr SetJumpTarget(nanConversion); TEST(32, R(gprsrc), Imm32(0x00400000)); FixupBranch continue1 = J_CC(CC_NZ, true); - ANDPD(dst, M(&double_qnan_bit)); + ANDPD(dst, MConst(double_qnan_bit)); FixupBranch continue2 = J(true); SwitchToNearCode(); @@ -1069,7 +1069,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) { MOVQ_xmm(R(RSCRATCH), xmm); SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it. - PTEST(xmm, M(psDoubleExp)); + PTEST(xmm, MConst(psDoubleExp)); FixupBranch maxExponent = J_CC(CC_C); FixupBranch zeroExponent = J_CC(CC_Z); @@ -1079,7 +1079,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) continue1 = J(); SetJumpTarget(maxExponent); - PTEST(xmm, M(psDoubleFrac)); + PTEST(xmm, MConst(psDoubleFrac)); FixupBranch notNAN = J_CC(CC_Z); // Max exponent + mantissa: PPC_FPCLASS_QNAN @@ -1109,10 +1109,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) else { MOVQ_xmm(R(RSCRATCH), xmm); - TEST(64, R(RSCRATCH), M(psDoubleExp)); + TEST(64, R(RSCRATCH), MConst(psDoubleExp)); FixupBranch zeroExponent = J_CC(CC_Z); - AND(64, R(RSCRATCH), M(psDoubleNoSign)); - CMP(64, R(RSCRATCH), M(psDoubleExp)); + AND(64, R(RSCRATCH), MConst(psDoubleNoSign)); + CMP(64, R(RSCRATCH), MConst(psDoubleExp)); FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative FixupBranch infinity = J_CC(CC_E); From 3dccc369d3f377f7a04ea57b8e5d1ca2ac6fc75d Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 12:40:46 +0000 Subject: [PATCH 7/9] Jit64AsmCommon: Use MConst for constants --- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index a98d69551d..876d24c73e 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -180,8 +180,6 @@ void CommonAsmRoutines::GenMfcr() X64Reg tmp = RSCRATCH2; X64Reg cr_val = RSCRATCH_EXTRA; XOR(32, R(dst), R(dst)); - // we only need to zero the high bits of tmp once - XOR(32, R(tmp), R(tmp)); for (int i = 0; i < 8; i++) { static const u32 m_flagTable[8] = {0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9}; @@ -190,9 +188,13 @@ void CommonAsmRoutines::GenMfcr() MOV(64, R(cr_val), PPCSTATE(cr_val[i])); + // Upper bits of tmp need to be zeroed. + // Note: tmp is used later for address calculations and thus + // can't be zero-ed once. This also prevents partial + // register stalls due to SETcc. + XOR(32, R(tmp), R(tmp)); // EQ: Bits 31-0 == 0; set flag bit 1 TEST(32, R(cr_val), R(cr_val)); - // FIXME: is there a better way to do this without the partial register merging? SETcc(CC_Z, R(tmp)); LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0)); @@ -204,7 +206,8 @@ void CommonAsmRoutines::GenMfcr() // SO: Bit 61 set; set flag bit 0 // LT: Bit 62 set; set flag bit 3 SHR(64, R(cr_val), Imm8(61)); - OR(32, R(dst), MScaled(cr_val, SCALE_4, PtrOffset(m_flagTable))); + LEA(64, tmp, MConst(m_flagTable)); + OR(32, R(dst), MComplex(tmp, cr_val, SCALE_4, 0)); } RET(); @@ -297,11 +300,12 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type, if (quantize == -1) { SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS))); + LEA(64, RSCRATCH, MConst(m_quantizeTableS)); + MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH)); } else if (quantize > 0) { - MULSS(XMM0, M(&m_quantizeTableS[quantize * 2])); + MULSS(XMM0, MConst(m_quantizeTableS, quantize * 2)); } switch (type) @@ -309,20 +313,20 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type, case QUANTIZE_U8: XORPS(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); - MINSS(XMM0, M(&m_255)); + MINSS(XMM0, MConst(m_255)); break; case QUANTIZE_S8: - MAXSS(XMM0, M(&m_m128)); - MINSS(XMM0, M(&m_127)); + MAXSS(XMM0, MConst(m_m128)); + MINSS(XMM0, MConst(m_127)); break; case QUANTIZE_U16: XORPS(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); - MINSS(XMM0, M(m_65535)); + MINSS(XMM0, MConst(m_65535)); break; case QUANTIZE_S16: - MAXSS(XMM0, M(&m_m32768)); - MINSS(XMM0, M(&m_32767)); + MAXSS(XMM0, MConst(m_m32768)); + MINSS(XMM0, MConst(m_32767)); break; default: break; @@ -335,12 +339,13 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type, if (quantize == -1) { SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS))); + LEA(64, RSCRATCH, MConst(m_quantizeTableS)); + MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH)); MULPS(XMM0, R(XMM1)); } else if (quantize > 0) { - MOVQ_xmm(XMM1, M(&m_quantizeTableS[quantize * 2])); + MOVQ_xmm(XMM1, MConst(m_quantizeTableS, quantize * 2)); MULPS(XMM0, R(XMM1)); } @@ -358,7 +363,7 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type, // is out of int32 range while it's OK for large negatives, it isn't for positives // I don't know whether the overflow actually happens in any games but it potentially can // cause problems, so we need some clamping - MINPS(XMM0, M(m_65535)); + MINPS(XMM0, MConst(m_65535)); CVTTPS2DQ(XMM0, R(XMM0)); switch (type) @@ -419,7 +424,7 @@ void QuantizedMemoryRoutines::GenQuantizedStoreFloat(bool single, bool isInline) { if (cpu_info.bSSSE3) { - PSHUFB(XMM0, M(pbswapShuffle2x4)); + PSHUFB(XMM0, MConst(pbswapShuffle2x4)); MOVQ_xmm(R(RSCRATCH), XMM0); } else @@ -492,13 +497,14 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type, if (quantize == -1) { SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS))); + LEA(64, RSCRATCH, MConst(m_dequantizeTableS)); + MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH)); } else if (quantize > 0) { - MULSS(XMM0, M(&m_dequantizeTableS[quantize * 2])); + MULSS(XMM0, MConst(m_dequantizeTableS, quantize * 2)); } - UNPCKLPS(XMM0, M(m_one)); + UNPCKLPS(XMM0, MConst(m_one)); } else { @@ -564,12 +570,13 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type, if (quantize == -1) { SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS))); + LEA(64, RSCRATCH, MConst(m_dequantizeTableS)); + MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH)); MULPS(XMM0, R(XMM1)); } else if (quantize > 0) { - MOVQ_xmm(XMM1, M(&m_dequantizeTableS[quantize * 2])); + MOVQ_xmm(XMM1, MConst(m_dequantizeTableS, quantize * 2)); MULPS(XMM0, R(XMM1)); } } @@ -597,7 +604,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) else if (cpu_info.bSSSE3) { MOVD_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA)); - PSHUFB(XMM0, M(pbswapShuffle1x4)); + PSHUFB(XMM0, MConst(pbswapShuffle1x4)); } else { @@ -605,7 +612,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); } - UNPCKLPS(XMM0, M(m_one)); + UNPCKLPS(XMM0, MConst(m_one)); } else { @@ -623,7 +630,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) else if (cpu_info.bSSSE3) { MOVQ_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA)); - PSHUFB(XMM0, M(pbswapShuffle2x4)); + PSHUFB(XMM0, MConst(pbswapShuffle2x4)); } else { From 9058ccea3fabeac1d558ad5783f50dc724a3163b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 19 Mar 2017 13:49:41 +0000 Subject: [PATCH 8/9] IR_X86: Use MConst for constants --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 97bb83fa9e..b24d16c95a 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -1753,7 +1753,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregURegWithMov(RI, I); alignas(16) static const u32 ssSignBits[4] = {0x80000000}; - Jit->PXOR(reg, M(ssSignBits)); + Jit->PXOR(reg, Jit->MConst(ssSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; @@ -1765,7 +1765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregURegWithMov(RI, I); alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL}; - Jit->PXOR(reg, M(sdSignBits)); + Jit->PXOR(reg, Jit->MConst(sdSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; @@ -1777,7 +1777,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregURegWithMov(RI, I); alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000}; - Jit->PXOR(reg, M(psSignBits)); + Jit->PXOR(reg, Jit->MConst(psSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; From 4491e9b8297a4f554601f7d3167af96e32dd7831 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 20 Mar 2017 19:40:22 +0000 Subject: [PATCH 9/9] Jit_SystemRegisters: Add missing sizes to constant arrays --- Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index a9ae18806a..f60b15488d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -657,7 +657,7 @@ void Jit64::mffsx(UGeckoInstruction inst) } // MXCSR = s_fpscr_to_mxcsr[FPSCR & 7] -static const u32 s_fpscr_to_mxcsr[] = { +static const u32 s_fpscr_to_mxcsr[8] = { 0x1F80, 0x7F80, 0x5F80, 0x3F80, 0x9F80, 0xFF80, 0xDF80, 0xBF80, };