diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 0d76fb8c47..a1bb58f238 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -196,6 +196,8 @@ if(_M_X86) PowerPC/Jit64IL/IR_X86.cpp PowerPC/Jit64IL/JitIL.cpp PowerPC/Jit64IL/JitIL_Tables.cpp + PowerPC/Jit64/FPURegCache.cpp + PowerPC/Jit64/GPRRegCache.cpp PowerPC/Jit64/Jit64_Tables.cpp PowerPC/Jit64/JitAsm.cpp PowerPC/Jit64/Jit_Branch.cpp diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index 47845d7b3d..13492f654f 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -225,6 +225,8 @@ + + @@ -427,6 +429,8 @@ + + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index ea1040901e..00023e98f4 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -666,6 +666,12 @@ PowerPC\JitIL + + PowerPC\Jit64 + + + PowerPC\Jit64 + PowerPC\Jit64 @@ -1262,6 +1268,12 @@ PowerPC\JitIL + + PowerPC\Jit64 + + + PowerPC\Jit64 + PowerPC\Jit64 diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp new file mode 100644 index 0000000000..73689377e1 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/PowerPC/Jit64/FPURegCache.h" + +#include "Core/PowerPC/Jit64/Jit.h" +#include "Core/PowerPC/Jit64Common/Jit64Base.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" + +using namespace Gen; + +FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit} +{ +} + +void FPURegCache::StoreRegister(size_t preg, const OpArg& new_loc) +{ + m_emitter->MOVAPD(new_loc, m_regs[preg].location.GetSimpleReg()); +} + +void FPURegCache::LoadRegister(size_t preg, X64Reg new_loc) +{ + m_emitter->MOVAPD(new_loc, m_regs[preg].location); +} + +const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) +{ + static const X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, + XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; + *count = sizeof(allocation_order) / sizeof(X64Reg); + return allocation_order; +} + +OpArg FPURegCache::GetDefaultLocation(size_t reg) const +{ + return PPCSTATE(ps[reg][0]); +} + +BitSet32 FPURegCache::GetRegUtilization() +{ + return m_jit.js.op->gprInReg; +} + +BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead) +{ + BitSet32 regs_used; + + for (u32 i = 1; i < lookahead; i++) + { + BitSet32 regs_in = m_jit.js.op[i].fregsIn; + regs_used |= regs_in; + if (regs_in[preg]) + return regs_used; + } + + return regs_used; +} diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/FPURegCache.h new file mode 100644 index 0000000000..5e7fb5e5b5 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/FPURegCache.h @@ -0,0 +1,22 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/Jit64/JitRegCache.h" + +class Jit64; + +class FPURegCache final : public RegCache +{ +public: + explicit FPURegCache(Jit64& jit); + + void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; + void LoadRegister(size_t preg, Gen::X64Reg newLoc) override; + const Gen::X64Reg* GetAllocationOrder(size_t* count) override; + Gen::OpArg GetDefaultLocation(size_t reg) const override; + BitSet32 GetRegUtilization() override; + BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; +}; diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp new file mode 100644 index 0000000000..e94f585526 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp @@ -0,0 +1,76 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/PowerPC/Jit64/GPRRegCache.h" + +#include "Core/PowerPC/Jit64/Jit.h" +#include "Core/PowerPC/Jit64Common/Jit64Base.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" + +using namespace Gen; + +GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit} +{ +} + +void GPRRegCache::StoreRegister(size_t preg, const OpArg& new_loc) +{ + m_emitter->MOV(32, new_loc, m_regs[preg].location); +} + +void GPRRegCache::LoadRegister(size_t preg, X64Reg new_loc) +{ + m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].location); +} + +OpArg GPRRegCache::GetDefaultLocation(size_t reg) const +{ + return PPCSTATE(gpr[reg]); +} + +const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count) +{ + static const X64Reg allocation_order[] = { +// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into +// this. +#ifdef _WIN32 + RSI, RDI, R13, R14, R15, R8, + R9, R10, R11, R12, RCX +#else + R12, R13, R14, R15, RSI, RDI, + R8, R9, R10, R11, RCX +#endif + }; + *count = sizeof(allocation_order) / sizeof(X64Reg); + return allocation_order; +} + +void GPRRegCache::SetImmediate32(size_t preg, u32 imm_value, bool dirty) +{ + // "dirty" can be false to avoid redundantly flushing an immediate when + // processing speculative constants. + DiscardRegContentsIfCached(preg); + m_regs[preg].away |= dirty; + m_regs[preg].location = Imm32(imm_value); +} + +BitSet32 GPRRegCache::GetRegUtilization() +{ + return m_jit.js.op->gprInReg; +} + +BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead) +{ + BitSet32 regs_used; + + for (u32 i = 1; i < lookahead; i++) + { + BitSet32 regs_in = m_jit.js.op[i].regsIn; + regs_used |= regs_in; + if (regs_in[preg]) + return regs_used; + } + + return regs_used; +} diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h new file mode 100644 index 0000000000..4e23268ece --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h @@ -0,0 +1,23 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/Jit64/JitRegCache.h" + +class Jit64; + +class GPRRegCache final : public RegCache +{ +public: + explicit GPRRegCache(Jit64& jit); + + void StoreRegister(size_t preg, const Gen::OpArg& new_loc) override; + void LoadRegister(size_t preg, Gen::X64Reg new_loc) override; + Gen::OpArg GetDefaultLocation(size_t reg) const override; + const Gen::X64Reg* GetAllocationOrder(size_t* count) override; + void SetImmediate32(size_t preg, u32 imm_value, bool dirty = true); + BitSet32 GetRegUtilization() override; + BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; +}; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 53a0c49998..0c10f85c02 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -950,7 +950,7 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const BitSet32 Jit64::CallerSavedRegistersInUse() const { BitSet32 result; - for (int i = 0; i < NUMXREGS; i++) + for (size_t i = 0; i < RegCache::NUM_XREGS; i++) { if (!gpr.IsFreeX(i)) result[i] = true; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 588d776fab..fc7e5d3522 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -21,6 +21,8 @@ #include "Common/CommonTypes.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64/FPURegCache.h" +#include "Core/PowerPC/Jit64/GPRRegCache.h" #include "Core/PowerPC/Jit64/JitAsm.h" #include "Core/PowerPC/Jit64/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" @@ -33,8 +35,8 @@ private: void AllocStack(); void FreeStack(); - GPRRegCache gpr; - FPURegCache fpr; + GPRRegCache gpr{*this}; + FPURegCache fpr{*this}; // The default code buffer. We keep it around to not have to alloc/dealloc a // large chunk of memory for each recompiled block. diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index 278d6a126c..071e6bebc9 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -14,30 +14,29 @@ #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; using namespace PowerPC; -RegCache::RegCache() : emit(nullptr) +RegCache::RegCache(Jit64& jit) : m_jit{jit} { } void RegCache::Start() { - for (auto& xreg : xregs) + for (auto& xreg : m_xregs) { xreg.free = true; xreg.dirty = false; xreg.locked = false; xreg.ppcReg = INVALID_REG; } - for (size_t i = 0; i < regs.size(); i++) + for (size_t i = 0; i < m_regs.size(); i++) { - regs[i].location = GetDefaultLocation(i); - regs[i].away = false; - regs[i].locked = false; + m_regs[i].location = GetDefaultLocation(i); + m_regs[i].away = false; + m_regs[i].locked = false; } // todo: sort to find the most popular regs @@ -57,84 +56,220 @@ void RegCache::Start() // But only preload IF written OR reads >= 3 } +void RegCache::DiscardRegContentsIfCached(size_t preg) +{ + if (IsBound(preg)) + { + X64Reg xr = m_regs[preg].location.GetSimpleReg(); + m_xregs[xr].free = true; + m_xregs[xr].dirty = false; + m_xregs[xr].ppcReg = INVALID_REG; + m_regs[preg].away = false; + m_regs[preg].location = GetDefaultLocation(preg); + } +} + +void RegCache::SetEmitter(XEmitter* emitter) +{ + m_emitter = emitter; +} + +void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) +{ + for (size_t i = 0; i < m_xregs.size(); i++) + { + if (m_xregs[i].locked) + PanicAlert("Someone forgot to unlock X64 reg %zu", i); + } + + for (unsigned int i : regsToFlush) + { + if (m_regs[i].locked) + { + PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i)); + } + + if (m_regs[i].away) + { + if (m_regs[i].location.IsSimpleReg() || m_regs[i].location.IsImm()) + { + StoreFromRegister(i, mode); + } + else + { + _assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC); + } + } + } +} + +void RegCache::FlushR(X64Reg reg) +{ + if (reg >= m_xregs.size()) + PanicAlert("Flushing non existent reg"); + if (!m_xregs[reg].free) + { + StoreFromRegister(m_xregs[reg].ppcReg); + } +} + +void RegCache::FlushR(X64Reg reg, X64Reg reg2) +{ + FlushR(reg); + FlushR(reg2); +} + +void RegCache::FlushLockX(X64Reg reg) +{ + FlushR(reg); + LockX(reg); +} + +void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2) +{ + FlushR(reg1); + FlushR(reg2); + LockX(reg1); + LockX(reg2); +} + +int RegCache::SanityCheck() const +{ + for (size_t i = 0; i < m_regs.size(); i++) + { + if (m_regs[i].away) + { + if (m_regs[i].location.IsSimpleReg()) + { + Gen::X64Reg simple = m_regs[i].location.GetSimpleReg(); + if (m_xregs[simple].locked) + return 1; + if (m_xregs[simple].ppcReg != i) + return 2; + } + else if (m_regs[i].location.IsImm()) + { + return 3; + } + } + } + return 0; +} + +void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) +{ + if (m_regs[preg].away) + { + if (m_regs[preg].location.IsImm()) + BindToRegister(preg, doLoad, makeDirty); + else if (m_regs[preg].location.IsSimpleReg()) + m_xregs[RX(preg)].dirty |= makeDirty; + } +} + +void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty) +{ + if (!m_regs[i].away || m_regs[i].location.IsImm()) + { + X64Reg xr = GetFreeXReg(); + if (m_xregs[xr].dirty) + PanicAlert("Xreg already dirty"); + if (m_xregs[xr].locked) + PanicAlert("GetFreeXReg returned locked register"); + m_xregs[xr].free = false; + m_xregs[xr].ppcReg = i; + m_xregs[xr].dirty = makeDirty || m_regs[i].away; + if (doLoad) + LoadRegister(i, xr); + for (size_t j = 0; j < m_regs.size(); j++) + { + if (i != j && m_regs[j].location.IsSimpleReg(xr)) + { + Crash(); + } + } + m_regs[i].away = true; + m_regs[i].location = ::Gen::R(xr); + } + else + { + // reg location must be simplereg; memory locations + // and immediates are taken care of above. + m_xregs[RX(i)].dirty |= makeDirty; + } + + if (m_xregs[RX(i)].locked) + { + PanicAlert("Seriously WTF, this reg should have been flushed"); + } +} + +void RegCache::StoreFromRegister(size_t i, FlushMode mode) +{ + if (m_regs[i].away) + { + bool doStore; + if (m_regs[i].location.IsSimpleReg()) + { + X64Reg xr = RX(i); + doStore = m_xregs[xr].dirty; + if (mode == FLUSH_ALL) + { + m_xregs[xr].free = true; + m_xregs[xr].ppcReg = INVALID_REG; + m_xregs[xr].dirty = false; + } + } + else + { + // must be immediate - do nothing + doStore = true; + } + OpArg newLoc = GetDefaultLocation(i); + if (doStore) + StoreRegister(i, newLoc); + if (mode == FLUSH_ALL) + { + m_regs[i].location = newLoc; + m_regs[i].away = false; + } + } +} + +const OpArg& RegCache::R(size_t preg) const +{ + return m_regs[preg].location; +} + +X64Reg RegCache::RX(size_t preg) const +{ + if (IsBound(preg)) + return m_regs[preg].location.GetSimpleReg(); + + PanicAlert("Unbound register - %zu", preg); + return Gen::INVALID_REG; +} + void RegCache::UnlockAll() { - for (auto& reg : regs) + for (auto& reg : m_regs) reg.locked = false; } void RegCache::UnlockAllX() { - for (auto& xreg : xregs) + for (auto& xreg : m_xregs) xreg.locked = false; } -BitSet32 GPRRegCache::GetRegUtilization() +bool RegCache::IsFreeX(size_t xreg) const { - return jit->js.op->gprInReg; + return m_xregs[xreg].free && !m_xregs[xreg].locked; } -BitSet32 FPURegCache::GetRegUtilization() +bool RegCache::IsBound(size_t preg) const { - return jit->js.op->gprInReg; -} - -BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead) -{ - BitSet32 regsUsed; - for (u32 i = 1; i < lookahead; i++) - { - BitSet32 regsIn = jit->js.op[i].regsIn; - regsUsed |= regsIn; - if (regsIn[preg]) - return regsUsed; - } - return regsUsed; -} - -BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead) -{ - BitSet32 regsUsed; - for (u32 i = 1; i < lookahead; i++) - { - BitSet32 regsIn = jit->js.op[i].fregsIn; - regsUsed |= regsIn; - if (regsIn[preg]) - return regsUsed; - } - return regsUsed; -} - -// Estimate roughly how bad it would be to de-allocate this register. Higher score -// means more bad. -float RegCache::ScoreRegister(X64Reg xr) -{ - size_t preg = xregs[xr].ppcReg; - float score = 0; - - // If it's not dirty, we don't need a store to write it back to the register file, so - // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly - // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative - // to the number of extra stores it causes. - if (xregs[xr].dirty) - score += 2; - - // If the register isn't actually needed in a physical register for a later instruction, - // writing it back to the register file isn't quite as bad. - if (GetRegUtilization()[preg]) - { - // Don't look too far ahead; we don't want to have quadratic compilation times for - // enormous block sizes! - // This actually improves register allocation a tiny bit; I'm not sure why. - u32 lookahead = std::min(jit->js.instructionsLeft, 64); - // Count how many other registers are going to be used before we need this one again. - u32 regs_in_count = CountRegsIn(preg, lookahead).Count(); - // Totally ad-hoc heuristic to bias based on how many other registers we'll need - // before this one gets used again. - score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count)); - } - - return score; + return m_regs[preg].away && m_regs[preg].location.IsSimpleReg(); } X64Reg RegCache::GetFreeXReg() @@ -144,7 +279,7 @@ X64Reg RegCache::GetFreeXReg() for (size_t i = 0; i < aCount; i++) { X64Reg xr = aOrder[i]; - if (!xregs[xr].locked && xregs[xr].free) + if (!m_xregs[xr].locked && m_xregs[xr].free) { return xr; } @@ -158,8 +293,8 @@ X64Reg RegCache::GetFreeXReg() for (size_t i = 0; i < aCount; i++) { X64Reg xreg = (X64Reg)aOrder[i]; - size_t preg = xregs[xreg].ppcReg; - if (xregs[xreg].locked || regs[preg].locked) + size_t preg = m_xregs[xreg].ppcReg; + if (m_xregs[xreg].locked || m_regs[preg].locked) continue; float score = ScoreRegister(xreg); if (score < min_score) @@ -181,232 +316,45 @@ X64Reg RegCache::GetFreeXReg() return INVALID_REG; } -void RegCache::FlushR(X64Reg reg) -{ - if (reg >= xregs.size()) - PanicAlert("Flushing non existent reg"); - if (!xregs[reg].free) - { - StoreFromRegister(xregs[reg].ppcReg); - } -} - -int RegCache::SanityCheck() const -{ - for (size_t i = 0; i < regs.size(); i++) - { - if (regs[i].away) - { - if (regs[i].location.IsSimpleReg()) - { - Gen::X64Reg simple = regs[i].location.GetSimpleReg(); - if (xregs[simple].locked) - return 1; - if (xregs[simple].ppcReg != i) - return 2; - } - else if (regs[i].location.IsImm()) - { - return 3; - } - } - } - return 0; -} - -void RegCache::DiscardRegContentsIfCached(size_t preg) -{ - if (IsBound(preg)) - { - X64Reg xr = regs[preg].location.GetSimpleReg(); - xregs[xr].free = true; - xregs[xr].dirty = false; - xregs[xr].ppcReg = INVALID_REG; - regs[preg].away = false; - regs[preg].location = GetDefaultLocation(preg); - } -} - -void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty) -{ - // "dirty" can be false to avoid redundantly flushing an immediate when - // processing speculative constants. - DiscardRegContentsIfCached(preg); - regs[preg].away |= dirty; - regs[preg].location = Imm32(immValue); -} - -const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count) -{ - static const X64Reg allocationOrder[] = { -// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into -// this. -#ifdef _WIN32 - RSI, RDI, R13, R14, R15, R8, - R9, R10, R11, R12, RCX -#else - R12, R13, R14, R15, RSI, RDI, - R8, R9, R10, R11, RCX -#endif - }; - *count = sizeof(allocationOrder) / sizeof(X64Reg); - return allocationOrder; -} - -const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) -{ - static const X64Reg allocationOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, - XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; - *count = sizeof(allocationOrder) / sizeof(X64Reg); - return allocationOrder; -} - -OpArg GPRRegCache::GetDefaultLocation(size_t reg) const -{ - return PPCSTATE(gpr[reg]); -} - -OpArg FPURegCache::GetDefaultLocation(size_t reg) const -{ - return PPCSTATE(ps[reg][0]); -} - -void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) -{ - if (regs[preg].away) - { - if (regs[preg].location.IsImm()) - BindToRegister(preg, doLoad, makeDirty); - else if (regs[preg].location.IsSimpleReg()) - xregs[RX(preg)].dirty |= makeDirty; - } -} - -void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty) -{ - if (!regs[i].away || regs[i].location.IsImm()) - { - X64Reg xr = GetFreeXReg(); - if (xregs[xr].dirty) - PanicAlert("Xreg already dirty"); - if (xregs[xr].locked) - PanicAlert("GetFreeXReg returned locked register"); - xregs[xr].free = false; - xregs[xr].ppcReg = i; - xregs[xr].dirty = makeDirty || regs[i].away; - if (doLoad) - LoadRegister(i, xr); - for (size_t j = 0; j < regs.size(); j++) - { - if (i != j && regs[j].location.IsSimpleReg(xr)) - { - Crash(); - } - } - regs[i].away = true; - regs[i].location = ::Gen::R(xr); - } - else - { - // reg location must be simplereg; memory locations - // and immediates are taken care of above. - xregs[RX(i)].dirty |= makeDirty; - } - - if (xregs[RX(i)].locked) - { - PanicAlert("Seriously WTF, this reg should have been flushed"); - } -} - -void RegCache::StoreFromRegister(size_t i, FlushMode mode) -{ - if (regs[i].away) - { - bool doStore; - if (regs[i].location.IsSimpleReg()) - { - X64Reg xr = RX(i); - doStore = xregs[xr].dirty; - if (mode == FLUSH_ALL) - { - xregs[xr].free = true; - xregs[xr].ppcReg = INVALID_REG; - xregs[xr].dirty = false; - } - } - else - { - // must be immediate - do nothing - doStore = true; - } - OpArg newLoc = GetDefaultLocation(i); - if (doStore) - StoreRegister(i, newLoc); - if (mode == FLUSH_ALL) - { - regs[i].location = newLoc; - regs[i].away = false; - } - } -} - -void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc) -{ - emit->MOV(32, ::Gen::R(newLoc), regs[preg].location); -} - -void GPRRegCache::StoreRegister(size_t preg, const OpArg& newLoc) -{ - emit->MOV(32, newLoc, regs[preg].location); -} - -void FPURegCache::LoadRegister(size_t preg, X64Reg newLoc) -{ - emit->MOVAPD(newLoc, regs[preg].location); -} - -void FPURegCache::StoreRegister(size_t preg, const OpArg& newLoc) -{ - emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg()); -} - -void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) -{ - for (size_t i = 0; i < xregs.size(); i++) - { - if (xregs[i].locked) - PanicAlert("Someone forgot to unlock X64 reg %zu", i); - } - - for (unsigned int i : regsToFlush) - { - if (regs[i].locked) - { - PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i)); - } - - if (regs[i].away) - { - if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm()) - { - StoreFromRegister(i, mode); - } - else - { - _assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC); - } - } - } -} - int RegCache::NumFreeRegisters() { int count = 0; size_t aCount; const X64Reg* aOrder = GetAllocationOrder(&aCount); for (size_t i = 0; i < aCount; i++) - if (!xregs[aOrder[i]].locked && xregs[aOrder[i]].free) + if (!m_xregs[aOrder[i]].locked && m_xregs[aOrder[i]].free) count++; return count; } + +// Estimate roughly how bad it would be to de-allocate this register. Higher score +// means more bad. +float RegCache::ScoreRegister(X64Reg xr) +{ + size_t preg = m_xregs[xr].ppcReg; + float score = 0; + + // If it's not dirty, we don't need a store to write it back to the register file, so + // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly + // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative + // to the number of extra stores it causes. + if (m_xregs[xr].dirty) + score += 2; + + // If the register isn't actually needed in a physical register for a later instruction, + // writing it back to the register file isn't quite as bad. + if (GetRegUtilization()[preg]) + { + // Don't look too far ahead; we don't want to have quadratic compilation times for + // enormous block sizes! + // This actually improves register allocation a tiny bit; I'm not sure why. + u32 lookahead = std::min(m_jit.js.instructionsLeft, 64); + // Count how many other registers are going to be used before we need this one again. + u32 regs_in_count = CountRegsIn(preg, lookahead).Count(); + // Totally ad-hoc heuristic to bias based on how many other registers we'll need + // before this one gets used again. + score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count)); + } + + return score; +} diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index f47e57e377..54608ff793 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -10,6 +10,8 @@ #include "Common/x64Emitter.h" #include "Core/PowerPC/PPCAnalyst.h" +class Jit64; + enum FlushMode { FLUSH_ALL, @@ -31,55 +33,31 @@ struct X64CachedReg bool locked; }; -typedef int XReg; -typedef int PReg; - -#define NUMXREGS 16 - class RegCache { -protected: - std::array regs; - std::array xregs; - - virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) = 0; - - virtual BitSet32 GetRegUtilization() = 0; - virtual BitSet32 CountRegsIn(size_t preg, u32 lookahead) = 0; - - Gen::XEmitter* emit; - - float ScoreRegister(Gen::X64Reg xreg); - public: - RegCache(); - virtual ~RegCache() {} + static constexpr size_t NUM_XREGS = 16; + + explicit RegCache(Jit64& jit); + virtual ~RegCache() = default; + + virtual void StoreRegister(size_t preg, const Gen::OpArg& new_loc) = 0; + virtual void LoadRegister(size_t preg, Gen::X64Reg new_loc) = 0; + virtual Gen::OpArg GetDefaultLocation(size_t reg) const = 0; + void Start(); void DiscardRegContentsIfCached(size_t preg); - void SetEmitter(Gen::XEmitter* emitter) { emit = emitter; } - void FlushR(Gen::X64Reg reg); - void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2) - { - FlushR(reg); - FlushR(reg2); - } - - void FlushLockX(Gen::X64Reg reg) - { - FlushR(reg); - LockX(reg); - } - void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2) - { - FlushR(reg1); - FlushR(reg2); - LockX(reg1); - LockX(reg2); - } + void SetEmitter(Gen::XEmitter* emitter); void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32)); - void Flush(PPCAnalyst::CodeOp* op) { Flush(); } + + void FlushR(Gen::X64Reg reg); + void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2); + + void FlushLockX(Gen::X64Reg reg); + void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2); + int SanityCheck() const; void KillImmediate(size_t preg, bool doLoad, bool makeDirty); @@ -87,19 +65,9 @@ public: // read only will not set dirty flag void BindToRegister(size_t preg, bool doLoad = true, bool makeDirty = true); void StoreFromRegister(size_t preg, FlushMode mode = FLUSH_ALL); - virtual void StoreRegister(size_t preg, const Gen::OpArg& newLoc) = 0; - virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0; - const Gen::OpArg& R(size_t preg) const { return regs[preg].location; } - Gen::X64Reg RX(size_t preg) const - { - if (IsBound(preg)) - return regs[preg].location.GetSimpleReg(); - - PanicAlert("Unbound register - %zu", preg); - return Gen::INVALID_REG; - } - virtual Gen::OpArg GetDefaultLocation(size_t reg) const = 0; + const Gen::OpArg& R(size_t preg) const; + Gen::X64Reg RX(size_t preg) const; // Register locking. @@ -107,7 +75,7 @@ public: template void Lock(T p) { - regs[p].locked = true; + m_regs[p].locked = true; } template void Lock(T first, Args... args) @@ -120,9 +88,9 @@ public: template void LockX(T x) { - if (xregs[x].locked) + if (m_xregs[x].locked) PanicAlert("RegCache: x %i already locked!", x); - xregs[x].locked = true; + m_xregs[x].locked = true; } template void LockX(T first, Args... args) @@ -134,9 +102,9 @@ public: template void UnlockX(T x) { - if (!xregs[x].locked) + if (!m_xregs[x].locked) PanicAlert("RegCache: x %i already unlocked!", x); - xregs[x].locked = false; + m_xregs[x].locked = false; } template void UnlockX(T first, Args... args) @@ -148,31 +116,22 @@ public: void UnlockAll(); void UnlockAllX(); - bool IsFreeX(size_t xreg) const { return xregs[xreg].free && !xregs[xreg].locked; } - bool IsBound(size_t preg) const { return regs[preg].away && regs[preg].location.IsSimpleReg(); } + bool IsFreeX(size_t xreg) const; + bool IsBound(size_t preg) const; + Gen::X64Reg GetFreeXReg(); int NumFreeRegisters(); -}; -class GPRRegCache final : public RegCache -{ -public: - void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; - void LoadRegister(size_t preg, Gen::X64Reg newLoc) override; - Gen::OpArg GetDefaultLocation(size_t reg) const override; - const Gen::X64Reg* GetAllocationOrder(size_t* count) override; - void SetImmediate32(size_t preg, u32 immValue, bool dirty = true); - BitSet32 GetRegUtilization() override; - BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; -}; +protected: + virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) = 0; -class FPURegCache final : public RegCache -{ -public: - void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; - void LoadRegister(size_t preg, Gen::X64Reg newLoc) override; - const Gen::X64Reg* GetAllocationOrder(size_t* count) override; - Gen::OpArg GetDefaultLocation(size_t reg) const override; - BitSet32 GetRegUtilization() override; - BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; + virtual BitSet32 GetRegUtilization() = 0; + virtual BitSet32 CountRegsIn(size_t preg, u32 lookahead) = 0; + + float ScoreRegister(Gen::X64Reg xreg); + + Jit64& m_jit; + std::array m_regs; + std::array m_xregs; + Gen::XEmitter* m_emitter = nullptr; };