diff --git a/Source/Core/Common/BitSet.h b/Source/Core/Common/BitSet.h index 4425b6067c..5154735f86 100644 --- a/Source/Core/Common/BitSet.h +++ b/Source/Core/Common/BitSet.h @@ -193,10 +193,14 @@ public: constexpr BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); } constexpr BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); } constexpr BitSet operator~() const { return BitSet(~m_val); } + constexpr BitSet operator<<(IntTy shift) const { return BitSet(m_val << shift); } + constexpr BitSet operator>>(IntTy shift) const { return BitSet(m_val >> shift); } constexpr explicit operator bool() const { return m_val != 0; } BitSet& operator|=(BitSet other) { return *this = *this | other; } BitSet& operator&=(BitSet other) { return *this = *this & other; } BitSet& operator^=(BitSet other) { return *this = *this ^ other; } + BitSet& operator<<=(IntTy shift) { return *this = *this << shift; } + BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; } // Warning: Even though on modern CPUs this is a single fast instruction, // Dolphin's official builds do not currently assume POPCNT support on x86, // so slower explicit bit twiddling is generated. Still should generally diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 23752002cf..409546e43b 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -159,6 +159,7 @@ + diff --git a/Source/Core/Common/VariantUtil.h b/Source/Core/Common/VariantUtil.h new file mode 100644 index 0000000000..c865ad9681 --- /dev/null +++ b/Source/Core/Common/VariantUtil.h @@ -0,0 +1,26 @@ +// Copyright 2018 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +namespace detail +{ +template +struct VariantCastProxy +{ + const std::variant& v; + + template + operator std::variant() const + { + return std::visit([](auto&& arg) { return std::variant{arg}; }, v); + } +}; +} // namespace detail + +template +auto VariantCast(const std::variant& v) +{ + return detail::VariantCastProxy{v}; +} diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 003f2122ec..30ae5e0946 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -236,20 +236,20 @@ if(_M_X86) DSP/Jit/x64/DSPJitMultiplier.cpp DSP/Jit/x64/DSPJitTables.cpp DSP/Jit/x64/DSPJitUtil.cpp - PowerPC/Jit64/FPURegCache.cpp - PowerPC/Jit64/GPRRegCache.cpp - PowerPC/Jit64/Jit64_Tables.cpp - PowerPC/Jit64/JitAsm.cpp - PowerPC/Jit64/Jit_Branch.cpp PowerPC/Jit64/Jit.cpp + PowerPC/Jit64/Jit64_Tables.cpp + PowerPC/Jit64/Jit_Branch.cpp PowerPC/Jit64/Jit_FloatingPoint.cpp PowerPC/Jit64/Jit_Integer.cpp PowerPC/Jit64/Jit_LoadStore.cpp PowerPC/Jit64/Jit_LoadStoreFloating.cpp PowerPC/Jit64/Jit_LoadStorePaired.cpp PowerPC/Jit64/Jit_Paired.cpp - PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp + PowerPC/Jit64/JitAsm.cpp + PowerPC/Jit64/RegCache/FPURegCache.cpp + PowerPC/Jit64/RegCache/GPRRegCache.cpp + PowerPC/Jit64/RegCache/JitRegCache.cpp PowerPC/Jit64Common/BlockCache.cpp PowerPC/Jit64Common/ConstantPool.cpp PowerPC/Jit64Common/EmuCodeBlock.cpp diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index ae2b84308d..516848d8c2 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -241,8 +241,8 @@ - + @@ -260,13 +260,8 @@ - - - - - @@ -275,7 +270,12 @@ + + + + + @@ -284,10 +284,6 @@ - - - - @@ -295,6 +291,10 @@ + + + + diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index c4cfefd77a..2cdad6e54d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -27,7 +27,7 @@ #include "Core/HW/ProcessorInterface.h" #include "Core/PatchEngine.h" #include "Core/PowerPC/Jit64/JitAsm.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/FarCodeCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" @@ -756,8 +756,6 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.downcountAmount += opinfo->numCycles; js.fastmemLoadStore = nullptr; js.fixupExceptionHandler = false; - js.revertGprLoad = -1; - js.revertFprLoad = -1; if (!SConfig::GetInstance().bEnableDebugging) js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC); @@ -800,13 +798,17 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = J_CC(CC_Z, true); - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); - MOV(32, PPCSTATE(pc), Imm32(op.address)); - WriteExternalExceptionExit(); + gpr.Flush(); + fpr.Flush(); + + MOV(32, PPCSTATE(pc), Imm32(op.address)); + WriteExternalExceptionExit(); + } SwitchToNearCode(); - SetJumpTarget(noCPInt); SetJumpTarget(noExtIntEnable); } @@ -824,14 +826,19 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SwitchToFarCode(); SetJumpTarget(b1); - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); - // If a FPU exception occurs, the exception handler will read - // from PC. Update PC with the latest value in case that happens. - MOV(32, PPCSTATE(pc), Imm32(op.address)); - OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); - WriteExceptionExit(); + gpr.Flush(); + fpr.Flush(); + + // If a FPU exception occurs, the exception handler will read + // from PC. Update PC with the latest value in case that happens. + MOV(32, PPCSTATE(pc), Imm32(op.address)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + WriteExceptionExit(); + } SwitchToNearCode(); js.firstFPInstructionFound = true; @@ -866,20 +873,8 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) // output, which needs to be bound in the actual instruction compilation. // TODO: make this smarter in the case that we're actually register-starved, i.e. // prioritize the more important registers. - for (int reg : op.regsIn) - { - if (gpr.NumFreeRegisters() < 2) - break; - if (op.gprInReg[reg] && !gpr.R(reg).IsImm()) - gpr.BindToRegister(reg, true, false); - } - for (int reg : op.fregsIn) - { - if (fpr.NumFreeRegisters() < 2) - break; - if (op.fprInXmm[reg]) - fpr.BindToRegister(reg, true, false); - } + gpr.PreloadRegisters(op.regsIn & op.gprInReg); + fpr.PreloadRegisters(op.fregsIn & op.fprInXmm); CompileInstruction(op); @@ -908,24 +903,25 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr(); } - BitSet32 gprToFlush = BitSet32::AllTrue(32); - BitSet32 fprToFlush = BitSet32::AllTrue(32); - if (js.revertGprLoad >= 0) - gprToFlush[js.revertGprLoad] = false; - if (js.revertFprLoad >= 0) - fprToFlush[js.revertFprLoad] = false; - gpr.Flush(RegCache::FlushMode::MaintainState, gprToFlush); - fpr.Flush(RegCache::FlushMode::MaintainState, fprToFlush); + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + gpr.Revert(); + fpr.Revert(); + gpr.Flush(); + fpr.Flush(); + MOV(32, PPCSTATE(pc), Imm32(op.address)); WriteExceptionExit(); SwitchToNearCode(); } + gpr.Commit(); + fpr.Commit(); + // If we have a register that will never be used again, flush it. - for (int j : ~op.gprInUse) - gpr.StoreFromRegister(j); - for (int j : ~op.fprInUse) - fpr.StoreFromRegister(j); + gpr.Flush(~op.gprInUse); + fpr.Flush(~op.fprInUse); if (opinfo->flags & FL_LOADSTORE) ++js.numLoadStoreInst; @@ -969,15 +965,8 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const BitSet32 Jit64::CallerSavedRegistersInUse() const { - BitSet32 result; - for (size_t i = 0; i < RegCache::NUM_XREGS; i++) - { - if (!gpr.IsFreeX(i)) - result[i] = true; - if (!fpr.IsFreeX(i)) - result[16 + i] = true; - } - return result & ABI_ALL_CALLER_SAVED; + BitSet32 in_use = gpr.RegistersInUse() | (fpr.RegistersInUse() << 16); + return in_use & ABI_ALL_CALLER_SAVED; } void Jit64::EnableBlockLink() diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 068d42290c..35b356f32d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -21,10 +21,10 @@ #include "Common/CommonTypes.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" -#include "Core/PowerPC/Jit64/FPURegCache.h" -#include "Core/PowerPC/Jit64/GPRRegCache.h" #include "Core/PowerPC/Jit64/JitAsm.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h" +#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" #include "Core/PowerPC/JitCommon/JitCache.h" @@ -88,10 +88,8 @@ public: void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(bool ca); - void ComputeRC(const Gen::OpArg& arg, bool needs_test = true, bool needs_sext = true); + void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true); - // Use to extract bytes from a register using the regcache. offset is in bytes. - Gen::OpArg ExtractFromReg(int reg, int offset); void AndWithMask(Gen::X64Reg reg, u32 mask); bool CheckMergedBranch(u32 crf) const; void DoMergedBranch(); diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp deleted file mode 100644 index 3b65768db3..0000000000 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/Jit64/JitRegCache.h" - -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/BitSet.h" -#include "Common/CommonTypes.h" -#include "Common/MsgHandler.h" -#include "Common/x64Emitter.h" -#include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/PowerPC.h" - -using namespace Gen; -using namespace PowerPC; - -RegCache::RegCache(Jit64& jit) : m_jit{jit} -{ -} - -void RegCache::Start() -{ - m_xregs.fill({}); - for (size_t i = 0; i < m_regs.size(); i++) - { - m_regs[i] = PPCCachedReg{GetDefaultLocation(i)}; - } -} - -void RegCache::DiscardRegContentsIfCached(preg_t preg) -{ - if (m_regs[preg].IsBound()) - { - X64Reg xr = m_regs[preg].Location().GetSimpleReg(); - m_xregs[xr].SetFlushed(); - m_regs[preg].SetFlushed(); - } -} - -void RegCache::SetEmitter(XEmitter* emitter) -{ - m_emitter = emitter; -} - -void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) -{ - ASSERT_MSG( - DYNA_REC, - std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }), - "Someone forgot to unlock a X64 reg"); - - for (unsigned int i : regsToFlush) - { - ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(), "Someone forgot to unlock PPC reg %u (X64 reg %i).", - i, RX(i)); - - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - break; - case PPCCachedReg::LocationType::SpeculativeImmediate: - // We can have a cached value without a host register through speculative constants. - // It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE, - // if PPCSTATE is modified externally (e.g. fallback to interpreter). - m_regs[i].SetFlushed(); - break; - case PPCCachedReg::LocationType::Bound: - case PPCCachedReg::LocationType::Immediate: - StoreFromRegister(i, mode); - break; - } - } -} - -void RegCache::FlushLockX(X64Reg reg) -{ - FlushX(reg); - LockX(reg); -} - -void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2) -{ - FlushX(reg1); - FlushX(reg2); - LockX(reg1); - LockX(reg2); -} - -bool RegCache::SanityCheck() const -{ - for (size_t i = 0; i < m_regs.size(); i++) - { - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::SpeculativeImmediate: - case PPCCachedReg::LocationType::Immediate: - break; - case PPCCachedReg::LocationType::Bound: - { - if (m_regs[i].IsLocked()) - return false; - - Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg(); - if (m_xregs[xr].IsLocked()) - return false; - if (m_xregs[xr].Contents() != i) - return false; - break; - } - } - } - return true; -} - -void RegCache::KillImmediate(preg_t preg, bool doLoad, bool makeDirty) -{ - switch (m_regs[preg].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::SpeculativeImmediate: - break; - case PPCCachedReg::LocationType::Bound: - if (makeDirty) - m_xregs[RX(preg)].MakeDirty(); - break; - case PPCCachedReg::LocationType::Immediate: - BindToRegister(preg, doLoad, makeDirty); - break; - } -} - -void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) -{ - if (!m_regs[i].IsBound()) - { - X64Reg xr = GetFreeXReg(); - - ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr); - ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register"); - - m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway()); - - if (doLoad) - { - LoadRegister(i, xr); - } - - ASSERT_MSG(DYNA_REC, - std::none_of(m_regs.begin(), m_regs.end(), - [xr](const auto& r) { return r.Location().IsSimpleReg(xr); }), - "Xreg %i already bound", xr); - - m_regs[i].SetBoundTo(xr); - } - else - { - // reg location must be simplereg; memory locations - // and immediates are taken care of above. - if (makeDirty) - m_xregs[RX(i)].MakeDirty(); - } - - ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed"); -} - -void RegCache::StoreFromRegister(preg_t i, FlushMode mode) -{ - bool doStore = false; - - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::SpeculativeImmediate: - return; - case PPCCachedReg::LocationType::Bound: - { - X64Reg xr = RX(i); - doStore = m_xregs[xr].IsDirty(); - if (mode == FlushMode::All) - m_xregs[xr].SetFlushed(); - break; - } - case PPCCachedReg::LocationType::Immediate: - doStore = true; - break; - } - - if (doStore) - StoreRegister(i, GetDefaultLocation(i)); - if (mode == FlushMode::All) - m_regs[i].SetFlushed(); -} - -const OpArg& RegCache::R(preg_t preg) const -{ - return m_regs[preg].Location(); -} - -X64Reg RegCache::RX(preg_t preg) const -{ - ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg); - return m_regs[preg].Location().GetSimpleReg(); -} - -void RegCache::UnlockAll() -{ - for (auto& reg : m_regs) - reg.Unlock(); -} - -void RegCache::UnlockAllX() -{ - for (auto& xreg : m_xregs) - xreg.Unlock(); -} - -bool RegCache::IsFreeX(size_t xreg) const -{ - return m_xregs[xreg].IsFree(); -} - -X64Reg RegCache::GetFreeXReg() -{ - size_t aCount; - const X64Reg* aOrder = GetAllocationOrder(&aCount); - for (size_t i = 0; i < aCount; i++) - { - X64Reg xr = aOrder[i]; - if (m_xregs[xr].IsFree()) - { - return xr; - } - } - - // Okay, not found; run the register allocator heuristic and figure out which register we should - // clobber. - float min_score = std::numeric_limits::max(); - X64Reg best_xreg = INVALID_REG; - size_t best_preg = 0; - for (size_t i = 0; i < aCount; i++) - { - X64Reg xreg = (X64Reg)aOrder[i]; - preg_t preg = m_xregs[xreg].Contents(); - if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked()) - continue; - float score = ScoreRegister(xreg); - if (score < min_score) - { - min_score = score; - best_xreg = xreg; - best_preg = preg; - } - } - - if (best_xreg != INVALID_REG) - { - StoreFromRegister(best_preg); - return best_xreg; - } - - // Still no dice? Die! - ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs"); - return INVALID_REG; -} - -int RegCache::NumFreeRegisters() const -{ - int count = 0; - size_t aCount; - const X64Reg* aOrder = GetAllocationOrder(&aCount); - for (size_t i = 0; i < aCount; i++) - if (m_xregs[aOrder[i]].IsFree()) - count++; - return count; -} - -void RegCache::FlushX(X64Reg reg) -{ - ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg); - ASSERT(!m_xregs[reg].IsLocked()); - if (!m_xregs[reg].IsFree()) - { - StoreFromRegister(m_xregs[reg].Contents()); - } -} - -// Estimate roughly how bad it would be to de-allocate this register. Higher score -// means more bad. -float RegCache::ScoreRegister(X64Reg xreg) const -{ - preg_t preg = m_xregs[xreg].Contents(); - float score = 0; - - // If it's not dirty, we don't need a store to write it back to the register file, so - // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly - // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative - // to the number of extra stores it causes. - if (m_xregs[xreg].IsDirty()) - score += 2; - - // If the register isn't actually needed in a physical register for a later instruction, - // writing it back to the register file isn't quite as bad. - if (GetRegUtilization()[preg]) - { - // Don't look too far ahead; we don't want to have quadratic compilation times for - // enormous block sizes! - // This actually improves register allocation a tiny bit; I'm not sure why. - u32 lookahead = std::min(m_jit.js.instructionsLeft, 64); - // Count how many other registers are going to be used before we need this one again. - u32 regs_in_count = CountRegsIn(preg, lookahead).Count(); - // Totally ad-hoc heuristic to bias based on how many other registers we'll need - // before this one gets used again. - score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count)); - } - - return score; -} diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h deleted file mode 100644 index 9c59cab388..0000000000 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/Assert.h" -#include "Common/x64Emitter.h" -#include "Core/PowerPC/PPCAnalyst.h" - -class Jit64; - -using preg_t = size_t; - -class PPCCachedReg -{ -public: - enum class LocationType - { - /// Value is currently at its default location - Default, - /// Value is currently bound to a x64 register - Bound, - /// Value is known as an immediate and has not been written back to its default location - Immediate, - /// Value is known as an immediate and is already present at its default location - SpeculativeImmediate, - }; - - PPCCachedReg() = default; - - explicit PPCCachedReg(Gen::OpArg default_location_) - : default_location(default_location_), location(default_location_) - { - } - - const Gen::OpArg& Location() const { return location; } - - LocationType GetLocationType() const - { - if (!away) - { - if (location.IsImm()) - return LocationType::SpeculativeImmediate; - - ASSERT(location == default_location); - return LocationType::Default; - } - - ASSERT(location.IsImm() || location.IsSimpleReg()); - return location.IsImm() ? LocationType::Immediate : LocationType::Bound; - } - - bool IsAway() const { return away; } - bool IsBound() const { return GetLocationType() == LocationType::Bound; } - - void SetBoundTo(Gen::X64Reg xreg) - { - away = true; - location = Gen::R(xreg); - } - - void SetFlushed() - { - away = false; - location = default_location; - } - - void SetToImm32(u32 imm32, bool dirty = true) - { - away |= dirty; - location = Gen::Imm32(imm32); - } - - bool IsLocked() const { return locked; } - void Lock() { locked = true; } - void Unlock() { locked = false; } - -private: - Gen::OpArg default_location{}; - Gen::OpArg location{}; - bool away = false; // value not in source register - bool locked = false; -}; - -class X64CachedReg -{ -public: - preg_t Contents() const { return ppcReg; } - - void SetBoundTo(preg_t ppcReg_, bool dirty_) - { - free = false; - ppcReg = ppcReg_; - dirty = dirty_; - } - - void SetFlushed() - { - ppcReg = static_cast(Gen::INVALID_REG); - free = true; - dirty = false; - } - - bool IsFree() const { return free && !locked; } - - bool IsDirty() const { return dirty; } - void MakeDirty() { dirty = true; } - - bool IsLocked() const { return locked; } - void Lock() { locked = true; } - void Unlock() { locked = false; } - -private: - preg_t ppcReg = static_cast(Gen::INVALID_REG); - bool free = true; - bool dirty = false; - bool locked = false; -}; - -class RegCache -{ -public: - enum class FlushMode - { - All, - MaintainState, - }; - - static constexpr size_t NUM_XREGS = 16; - - explicit RegCache(Jit64& jit); - virtual ~RegCache() = default; - - virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0; - - void Start(); - - void DiscardRegContentsIfCached(preg_t preg); - void SetEmitter(Gen::XEmitter* emitter); - - void Flush(FlushMode mode = FlushMode::All, BitSet32 regsToFlush = BitSet32::AllTrue(32)); - - void FlushLockX(Gen::X64Reg reg); - void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2); - - bool SanityCheck() const; - void KillImmediate(preg_t preg, bool doLoad, bool makeDirty); - - // TODO - instead of doload, use "read", "write" - // read only will not set dirty flag - void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true); - void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::All); - - const Gen::OpArg& R(preg_t preg) const; - Gen::X64Reg RX(preg_t preg) const; - - // Register locking. - - // these are powerpc reg indices - template - void Lock(T p) - { - m_regs[p].Lock(); - } - template - void Lock(T first, Args... args) - { - Lock(first); - Lock(args...); - } - - // these are x64 reg indices - template - void LockX(T x) - { - if (m_xregs[x].IsLocked()) - PanicAlert("RegCache: x %i already locked!", x); - m_xregs[x].Lock(); - } - template - void LockX(T first, Args... args) - { - LockX(first); - LockX(args...); - } - - template - void UnlockX(T x) - { - if (!m_xregs[x].IsLocked()) - PanicAlert("RegCache: x %i already unlocked!", x); - m_xregs[x].Unlock(); - } - template - void UnlockX(T first, Args... args) - { - UnlockX(first); - UnlockX(args...); - } - - void UnlockAll(); - void UnlockAllX(); - - bool IsFreeX(size_t xreg) const; - - Gen::X64Reg GetFreeXReg(); - int NumFreeRegisters() const; - -protected: - virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0; - virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0; - - virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0; - - virtual BitSet32 GetRegUtilization() const = 0; - virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0; - - void FlushX(Gen::X64Reg reg); - - float ScoreRegister(Gen::X64Reg xreg) const; - - Jit64& m_jit; - std::array m_regs; - std::array m_xregs; - Gen::XEmitter* m_emitter = nullptr; -}; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 358e16cef9..7e0ded66c8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -8,7 +8,7 @@ #include "Core/CoreTiming.h" #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" @@ -160,9 +160,13 @@ void Jit64::bcx(UGeckoInstruction inst) else destination = js.compilerPC + SignExt16(inst.BD << 2); - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); - WriteExit(destination, inst.LK, js.compilerPC + 4); + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + gpr.Flush(); + fpr.Flush(); + WriteExit(destination, inst.LK, js.compilerPC + 4); + } if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget(pConditionDontBranch); @@ -215,10 +219,14 @@ void Jit64::bcctrx(UGeckoInstruction inst) if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); - WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); - // Would really like to continue the block here, but it ends. TODO. + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + gpr.Flush(); + fpr.Flush(); + WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); + // Would really like to continue the block here, but it ends. TODO. + } SetJumpTarget(b); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) @@ -269,9 +277,13 @@ void Jit64::bclrx(UGeckoInstruction inst) if (inst.LK) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); - WriteBLRExit(); + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + gpr.Flush(); + fpr.Flush(); + WriteBLRExit(); + } if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget(pConditionDontBranch); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 15a3588ff7..fcf65ee9b8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -12,7 +12,7 @@ #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" @@ -76,7 +76,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re std::vector fixups; for (u32 x : inputs) { - MOVDDUP(xmm, fpr.R(x)); + RCOpArg Rx = fpr.Use(x, RCMode::Read); + RegCache::Realize(Rx); + MOVDDUP(xmm, Rx); UCOMISD(xmm, R(xmm)); fixups.push_back(J_CC(CC_P)); } @@ -102,8 +104,10 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re BLENDVPD(xmm, MConst(psGeneratedQNaN)); for (u32 x : inputs) { - avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD); - BLENDVPD(xmm, fpr.R(x)); + RCOpArg Rx = fpr.Use(x, RCMode::Read); + RegCache::Realize(Rx); + avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD); + BLENDVPD(xmm, Rx); } FixupBranch done = J(true); SwitchToNearCode(); @@ -112,8 +116,8 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re else { // SSE2 fallback - X64Reg tmp = fpr.GetFreeXReg(); - fpr.FlushLockX(tmp); + RCX64Reg tmp = fpr.Scratch(); + RegCache::Realize(tmp); MOVAPD(clobber, R(xmm)); CMPPD(clobber, R(clobber), CMP_UNORD); MOVMSKPD(RSCRATCH, R(clobber)); @@ -125,20 +129,21 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re ANDNPD(clobber, R(xmm)); ANDPD(tmp, MConst(psGeneratedQNaN)); ORPD(tmp, R(clobber)); - MOVAPD(xmm, R(tmp)); + MOVAPD(xmm, tmp); for (u32 x : inputs) { - MOVAPD(clobber, fpr.R(x)); + RCOpArg Rx = fpr.Use(x, RCMode::Read); + RegCache::Realize(Rx); + MOVAPD(clobber, Rx); CMPPD(clobber, R(clobber), CMP_ORD); MOVAPD(tmp, R(clobber)); - ANDNPD(clobber, fpr.R(x)); - ANDPD(xmm, R(tmp)); + ANDNPD(clobber, Rx); + ANDPD(xmm, tmp); ORPD(xmm, R(clobber)); } FixupBranch done = J(true); SwitchToNearCode(); SetJumpTarget(done); - fpr.UnlockX(tmp); } } if (xmm_out != xmm) @@ -172,53 +177,55 @@ void Jit64::fp_arith(UGeckoInstruction inst) bool round_input = single && !js.op->fprIsSingle[inst.FC]; bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs; - const auto fp_tri_op = [&](int d, int a, int b, bool reversible, + const auto fp_tri_op = [&](int op1, int op2, bool reversible, void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool roundRHS = false) { - fpr.Lock(d, a, b); - fpr.BindToRegister(d, d == a || d == b || !single); - X64Reg dest = preserve_inputs ? XMM1 : fpr.RX(d); + RCX64Reg Rd = fpr.Bind(d, !single ? RCMode::ReadWrite : RCMode::Write); + RCOpArg Rop1 = fpr.Use(op1, RCMode::Read); + RCOpArg Rop2 = fpr.Use(op2, RCMode::Read); + RegCache::Realize(Rd, Rop1, Rop2); + + X64Reg dest = preserve_inputs ? XMM1 : static_cast(Rd); if (roundRHS) { - if (d == a && !preserve_inputs) + if (d == op1 && !preserve_inputs) { - Force25BitPrecision(XMM0, fpr.R(b), XMM1); - (this->*sseOp)(fpr.RX(d), R(XMM0)); + Force25BitPrecision(XMM0, Rop2, XMM1); + (this->*sseOp)(Rd, R(XMM0)); } else { - Force25BitPrecision(dest, fpr.R(b), XMM0); - (this->*sseOp)(dest, fpr.R(a)); + Force25BitPrecision(dest, Rop2, XMM0); + (this->*sseOp)(dest, Rop1); } } else { - avx_op(avxOp, sseOp, dest, fpr.R(a), fpr.R(b), packed, reversible); + avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible); } - HandleNaNs(inst, fpr.RX(d), dest); + HandleNaNs(inst, Rd, dest); if (single) - ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); + ForceSinglePrecision(Rd, Rd, packed, true); + SetFPRFIfNeeded(Rd); }; switch (inst.SUBOP5) { case 18: - fp_tri_op(d, a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD, + fp_tri_op(a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD, packed ? &XEmitter::DIVPD : &XEmitter::DIVSD); break; case 20: - fp_tri_op(d, a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD, + fp_tri_op(a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD, packed ? &XEmitter::SUBPD : &XEmitter::SUBSD); break; case 21: - fp_tri_op(d, a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD, + fp_tri_op(a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD, packed ? &XEmitter::ADDPD : &XEmitter::ADDSD); break; case 25: - fp_tri_op(d, a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD, + fp_tri_op(a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD, packed ? &XEmitter::MULPD : &XEmitter::MULSD, round_input); break; default: @@ -241,17 +248,32 @@ void Jit64::fmaddXX(UGeckoInstruction inst) bool packed = inst.OPCD == 4 || (!cpu_info.bAtom && single && js.op->fprIsDuplicated[a] && js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]); - fpr.Lock(a, b, c, d); + // While we don't know if any games are actually affected (replays seem to work with all the usual + // suspects for desyncing), netplay and other applications need absolute perfect determinism, so + // be extra careful and don't use FMA, even if in theory it might be okay. + // Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared + // to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin + // instances on different computers giving identical results. + const bool use_fma = cpu_info.bFMA && !Core::WantsDeterminism(); + + // For use_fma == true: + // Statistics suggests b is a lot less likely to be unbound in practice, so + // if we have to pick one of a or b to bind, let's make it b. + RCOpArg Ra = fpr.Use(a, RCMode::Read); + RCOpArg Rb = use_fma ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read); + RCOpArg Rc = fpr.Use(c, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite); + RegCache::Realize(Ra, Rb, Rc, Rd); switch (inst.SUBOP5) { case 14: - MOVDDUP(XMM1, fpr.R(c)); + MOVDDUP(XMM1, Rc); if (round_input) Force25BitPrecision(XMM1, R(XMM1), XMM0); break; case 15: - avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3); + avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3); if (round_input) Force25BitPrecision(XMM1, R(XMM1), XMM0); break; @@ -260,38 +282,29 @@ void Jit64::fmaddXX(UGeckoInstruction inst) X64Reg tmp1 = special ? XMM0 : XMM1; X64Reg tmp2 = special ? XMM1 : XMM0; if (single && round_input) - Force25BitPrecision(tmp1, fpr.R(c), tmp2); + Force25BitPrecision(tmp1, Rc, tmp2); else - MOVAPD(tmp1, fpr.R(c)); + MOVAPD(tmp1, Rc); break; } - // While we don't know if any games are actually affected (replays seem to work with all the usual - // suspects for desyncing), netplay and other applications need absolute perfect determinism, so - // be extra careful and don't use FMA, even if in theory it might be okay. - // Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared - // to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin - // instances on different computers giving identical results. - if (cpu_info.bFMA && !Core::WantsDeterminism()) + if (use_fma) { - // Statistics suggests b is a lot less likely to be unbound in practice, so - // if we have to pick one of a or b to bind, let's make it b. - fpr.BindToRegister(b, true, false); switch (inst.SUBOP5) { case 28: // msub if (packed) - VFMSUB132PD(XMM1, fpr.RX(b), fpr.R(a)); + VFMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra); else - VFMSUB132SD(XMM1, fpr.RX(b), fpr.R(a)); + VFMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra); break; case 14: // madds0 case 15: // madds1 case 29: // madd if (packed) - VFMADD132PD(XMM1, fpr.RX(b), fpr.R(a)); + VFMADD132PD(XMM1, Rb.GetSimpleReg(), Ra); else - VFMADD132SD(XMM1, fpr.RX(b), fpr.R(a)); + VFMADD132SD(XMM1, Rb.GetSimpleReg(), Ra); break; // PowerPC and x86 define NMADD/NMSUB differently // x86: D = -A*C (+/-) B @@ -299,15 +312,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst) // so we have to swap them; the ADD/SUB here isn't a typo. case 30: // nmsub if (packed) - VFNMADD132PD(XMM1, fpr.RX(b), fpr.R(a)); + VFNMADD132PD(XMM1, Rb.GetSimpleReg(), Ra); else - VFNMADD132SD(XMM1, fpr.RX(b), fpr.R(a)); + VFNMADD132SD(XMM1, Rb.GetSimpleReg(), Ra); break; case 31: // nmadd if (packed) - VFNMSUB132PD(XMM1, fpr.RX(b), fpr.R(a)); + VFNMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra); else - VFNMSUB132SD(XMM1, fpr.RX(b), fpr.R(a)); + VFNMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra); break; } } @@ -315,15 +328,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst) { // We implement nmsub a little differently ((b - a*c) instead of -(a*c - b)), so handle it // separately. - MOVAPD(XMM1, fpr.R(b)); + MOVAPD(XMM1, Rb); if (packed) { - MULPD(XMM0, fpr.R(a)); + MULPD(XMM0, Ra); SUBPD(XMM1, R(XMM0)); } else { - MULSD(XMM0, fpr.R(a)); + MULSD(XMM0, Ra); SUBSD(XMM1, R(XMM0)); } } @@ -331,36 +344,35 @@ void Jit64::fmaddXX(UGeckoInstruction inst) { if (packed) { - MULPD(XMM1, fpr.R(a)); + MULPD(XMM1, Ra); if (inst.SUBOP5 == 28) // msub - SUBPD(XMM1, fpr.R(b)); + SUBPD(XMM1, Rb); else //(n)madd(s[01]) - ADDPD(XMM1, fpr.R(b)); + ADDPD(XMM1, Rb); } else { - MULSD(XMM1, fpr.R(a)); + MULSD(XMM1, Ra); if (inst.SUBOP5 == 28) - SUBSD(XMM1, fpr.R(b)); + SUBSD(XMM1, Rb); else - ADDSD(XMM1, fpr.R(b)); + ADDSD(XMM1, Rb); } if (inst.SUBOP5 == 31) // nmadd XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits)); } - fpr.BindToRegister(d, !single); + if (single) { - HandleNaNs(inst, fpr.RX(d), XMM1); - ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true); + HandleNaNs(inst, Rd, XMM1); + ForceSinglePrecision(Rd, Rd, packed, true); } else { HandleNaNs(inst, XMM1, XMM1); - MOVSD(fpr.RX(d), R(XMM1)); + MOVSD(Rd, R(XMM1)); } - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); + SetFPRFIfNeeded(Rd); } void Jit64::fsign(UGeckoInstruction inst) @@ -373,29 +385,28 @@ void Jit64::fsign(UGeckoInstruction inst) int b = inst.FB; bool packed = inst.OPCD == 4; - fpr.Lock(b, d); - OpArg src = fpr.R(b); - fpr.BindToRegister(d, false); + RCOpArg src = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(src, Rd); switch (inst.SUBOP10) { case 40: // neg - avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src, - MConst(packed ? psSignBits2 : psSignBits), packed); + avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits), + packed); break; case 136: // nabs - avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, - MConst(packed ? psSignBits2 : psSignBits), packed); + avx_op(&XEmitter::VORPD, &XEmitter::ORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits), + packed); break; case 264: // abs - avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, - MConst(packed ? psAbsMask2 : psAbsMask), packed); + avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, Rd, src, MConst(packed ? psAbsMask2 : psAbsMask), + packed); break; default: PanicAlert("fsign bleh"); break; } - fpr.UnlockAll(); } void Jit64::fselx(UGeckoInstruction inst) @@ -411,35 +422,38 @@ void Jit64::fselx(UGeckoInstruction inst) bool packed = inst.OPCD == 4; // ps_sel - fpr.Lock(a, b, c, d); + RCOpArg Ra = fpr.Use(a, RCMode::Read); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCOpArg Rc = fpr.Use(c, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, packed ? RCMode::Write : RCMode::ReadWrite); + RegCache::Realize(Ra, Rb, Rc, Rd); + XORPD(XMM0, R(XMM0)); // This condition is very tricky; there's only one right way to handle both the case of // negative/positive zero and NaN properly. // (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE. if (packed) - CMPPD(XMM0, fpr.R(a), CMP_NLE); + CMPPD(XMM0, Ra, CMP_NLE); else - CMPSD(XMM0, fpr.R(a), CMP_NLE); + CMPSD(XMM0, Ra, CMP_NLE); if (cpu_info.bSSE4_1) { - MOVAPD(XMM1, fpr.R(c)); - BLENDVPD(XMM1, fpr.R(b)); + MOVAPD(XMM1, Rc); + BLENDVPD(XMM1, Rb); } else { MOVAPD(XMM1, R(XMM0)); - ANDPD(XMM0, fpr.R(b)); - ANDNPD(XMM1, fpr.R(c)); + ANDPD(XMM0, Rb); + ANDNPD(XMM1, Rc); ORPD(XMM1, R(XMM0)); } - fpr.BindToRegister(d, !packed); if (packed) - MOVAPD(fpr.RX(d), R(XMM1)); + MOVAPD(Rd, R(XMM1)); else - MOVSD(fpr.RX(d), R(XMM1)); - fpr.UnlockAll(); + MOVSD(Rd, R(XMM1)); } void Jit64::fmrx(UGeckoInstruction inst) @@ -454,26 +468,25 @@ void Jit64::fmrx(UGeckoInstruction inst) if (d == b) return; - fpr.Lock(b, d); - - if (fpr.R(d).IsSimpleReg()) + RCOpArg Rd = fpr.Use(d, RCMode::Write); + RegCache::Realize(Rd); + if (Rd.IsSimpleReg()) { - // We don't need to load d, but if it is loaded, we need to mark it as dirty. - fpr.BindToRegister(d); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RegCache::Realize(Rb); // We have to use MOVLPD if b isn't loaded because "MOVSD reg, mem" sets the upper bits (64+) // to zero and we don't want that. - if (!fpr.R(b).IsSimpleReg()) - MOVLPD(fpr.RX(d), fpr.R(b)); + if (!Rb.IsSimpleReg()) + MOVLPD(Rd.GetSimpleReg(), Rb); else - MOVSD(fpr.R(d), fpr.RX(b)); + MOVSD(Rd, Rb.GetSimpleReg()); } else { - fpr.BindToRegister(b, true, false); - MOVSD(fpr.R(d), fpr.RX(b)); + RCOpArg Rb = fpr.Bind(b, RCMode::Read); + RegCache::Realize(Rb); + MOVSD(Rd, Rb.GetSimpleReg()); } - - fpr.UnlockAll(); } void Jit64::FloatCompare(UGeckoInstruction inst, bool upper) @@ -500,22 +513,22 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper) output[3 - (next.CRBB & 3)] |= 1 << dst; } - fpr.Lock(a, b); - fpr.BindToRegister(b, true, false); + RCOpArg Ra = upper ? fpr.Bind(a, RCMode::Read) : fpr.Use(a, RCMode::Read); + RCX64Reg Rb = fpr.Bind(b, RCMode::Read); + RegCache::Realize(Ra, Rb); if (fprf) AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); if (upper) { - fpr.BindToRegister(a, true, false); - MOVHLPS(XMM0, fpr.RX(a)); - MOVHLPS(XMM1, fpr.RX(b)); + MOVHLPS(XMM0, Ra.GetSimpleReg()); + MOVHLPS(XMM1, Rb); UCOMISD(XMM1, R(XMM0)); } else { - UCOMISD(fpr.RX(b), fpr.R(a)); + UCOMISD(Rb, Ra); } FixupBranch pNaN, pLesser, pGreater; @@ -572,7 +585,6 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper) } MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); - fpr.UnlockAll(); } void Jit64::fcmpX(UGeckoInstruction inst) @@ -591,8 +603,10 @@ void Jit64::fctiwx(UGeckoInstruction inst) int d = inst.RD; int b = inst.RB; - fpr.Lock(d, b); - fpr.BindToRegister(d); + + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Rb, Rd); // Intel uses 0x80000000 as a generic error code while PowerPC uses clamping: // @@ -606,7 +620,7 @@ void Jit64::fctiwx(UGeckoInstruction inst) // except for -0.0 where they are set to 0xfff80001 (TODO). MOVAPD(XMM0, MConst(half_qnan_and_s32_max)); - MINSD(XMM0, fpr.R(b)); + MINSD(XMM0, Rb); switch (inst.SUBOP10) { // fctiwx @@ -620,8 +634,7 @@ void Jit64::fctiwx(UGeckoInstruction inst) break; } // d[64+] must not be modified - MOVSD(fpr.R(d), XMM0); - fpr.UnlockAll(); + MOVSD(Rd, XMM0); } void Jit64::frspx(UGeckoInstruction inst) @@ -633,12 +646,12 @@ void Jit64::frspx(UGeckoInstruction inst) int d = inst.FD; bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom; - fpr.Lock(b, d); - OpArg src = fpr.R(b); - fpr.BindToRegister(d, false); - ForceSinglePrecision(fpr.RX(d), src, packed, true); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Rb, Rd); + + ForceSinglePrecision(Rd, Rb, packed, true); + SetFPRFIfNeeded(Rd); } void Jit64::frsqrtex(UGeckoInstruction inst) @@ -649,15 +662,15 @@ void Jit64::frsqrtex(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - gpr.FlushLockX(RSCRATCH_EXTRA); - fpr.Lock(b, d); - fpr.BindToRegister(d); - MOVAPD(XMM0, fpr.R(b)); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(scratch_guard, Rb, Rd); + + MOVAPD(XMM0, Rb); CALL(asm_routines.frsqrte); - MOVSD(fpr.R(d), XMM0); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); - gpr.UnlockAllX(); + MOVSD(Rd, XMM0); + SetFPRFIfNeeded(Rd); } void Jit64::fresx(UGeckoInstruction inst) @@ -668,13 +681,13 @@ void Jit64::fresx(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - gpr.FlushLockX(RSCRATCH_EXTRA); - fpr.Lock(b, d); - MOVAPD(XMM0, fpr.R(b)); - fpr.BindToRegister(d, false); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(scratch_guard, Rb, Rd); + + MOVAPD(XMM0, Rb); CALL(asm_routines.fres); - MOVDDUP(fpr.RX(d), R(XMM0)); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); - gpr.UnlockAllX(); + MOVDDUP(Rd, R(XMM0)); + SetFPRFIfNeeded(Rd); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index f91443996b..f132959dcb 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -13,7 +13,7 @@ #include "Common/MathUtil.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" @@ -141,9 +141,11 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) // branches, only EQ. // The flags from any instruction that may set OF (such as ADD/SUB) can not be used for // LT/GT either. -void Jit64::ComputeRC(const OpArg& arg, bool needs_test, bool needs_sext) +void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext) { - ASSERT_MSG(DYNA_REC, arg.IsSimpleReg() || arg.IsImm(), "Invalid ComputeRC operand"); + RCOpArg arg = gpr.Use(preg, RCMode::Read); + RegCache::Realize(arg); + if (arg.IsImm()) { MOV(64, PPCSTATE(cr_val[0]), Imm32(arg.SImm32())); @@ -157,17 +159,21 @@ void Jit64::ComputeRC(const OpArg& arg, bool needs_test, bool needs_sext) { MOV(64, PPCSTATE(cr_val[0]), arg); } + if (CheckMergedBranch(0)) { if (arg.IsImm()) { - DoMergedBranchImmediate(arg.SImm32()); + s32 offset = arg.SImm32(); + arg.Unlock(); + DoMergedBranchImmediate(offset); } else { if (needs_test) { TEST(32, arg, arg); + arg.Unlock(); } else { @@ -175,27 +181,14 @@ void Jit64::ComputeRC(const OpArg& arg, bool needs_test, bool needs_sext) // better to flush it here so that we don't have to flush it on both sides of the branch. // We don't want to do this if a test is needed though, because it would interrupt macro-op // fusion. - for (int j : ~js.op->gprInUse) - gpr.StoreFromRegister(j); + arg.Unlock(); + gpr.Flush(~js.op->gprInUse); } DoMergedBranchCondition(); } } } -OpArg Jit64::ExtractFromReg(int reg, int offset) -{ - OpArg src = gpr.R(reg); - // store to load forwarding should handle this case efficiently - if (offset) - { - gpr.StoreFromRegister(reg, RegCache::FlushMode::MaintainState); - src = gpr.GetDefaultLocation(reg); - src.AddMemOffset(offset); - } - return src; -} - // we can't do this optimization in the emitter because MOVZX and AND have different effects on // flags. void Jit64::AndWithMask(X64Reg reg, u32 mask) @@ -233,31 +226,28 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const OpArg&, const OpArg&), bool Rc, bool carry) { bool needs_test = doop == Add; - gpr.Lock(d, a); // Be careful; addic treats r0 as r0, but addi treats r0 as zero. if (a || binary || carry) { carry &= js.op->wantsCA; - if (gpr.R(a).IsImm() && !carry) + if (gpr.IsImm(a) && !carry) { - gpr.SetImmediate32(d, doop(gpr.R(a).Imm32(), value)); - } - else if (a == d) - { - gpr.BindToRegister(d, true); - (this->*op)(32, gpr.R(d), Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; + gpr.SetImmediate32(d, doop(gpr.Imm32(a), value)); } else { - gpr.BindToRegister(d, false); - if (doop == Add && gpr.R(a).IsSimpleReg() && !carry) + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + if (doop == Add && Ra.IsSimpleReg() && !carry && d != a) { - LEA(32, gpr.RX(d), MDisp(gpr.RX(a), value)); + LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value)); } else { - MOV(32, gpr.R(d), gpr.R(a)); - (this->*op)(32, gpr.R(d), Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; + if (d != a) + MOV(32, Rd, Ra); + (this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; } } if (carry) @@ -273,8 +263,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, ASSERT_MSG(DYNA_REC, 0, "WTF regimmop"); } if (Rc) - ComputeRC(gpr.R(d), needs_test, doop != And || (value & 0x80000000)); - gpr.UnlockAll(); + ComputeRC(d, needs_test, doop != And || (value & 0x80000000)); } void Jit64::reg_imm(UGeckoInstruction inst) @@ -286,16 +275,16 @@ void Jit64::reg_imm(UGeckoInstruction inst) { case 14: // addi // occasionally used as MOV - emulate, with immediate propagation - if (gpr.R(a).IsImm() && d != a && a != 0) + if (gpr.IsImm(a) && d != a && a != 0) { - gpr.SetImmediate32(d, gpr.R(a).Imm32() + (u32)(s32)inst.SIMM_16); + gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16); } else if (inst.SIMM_16 == 0 && d != a && a != 0) { - gpr.Lock(a, d); - gpr.BindToRegister(d, false, true); - MOV(32, gpr.R(d), gpr.R(a)); - gpr.UnlockAll(); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + MOV(32, Rd, Ra); } else { @@ -416,8 +405,8 @@ void Jit64::DoMergedBranchCondition() bool condition = !!(next.BO & BO_BRANCH_IF_TRUE); const u32 nextPC = js.op[1].address; - gpr.UnlockAll(); - gpr.UnlockAllX(); + ASSERT(gpr.IsAllUnlocked()); + FixupBranch pDontBranch; if (test_bit & 8) pDontBranch = J_CC(condition ? CC_GE : CC_L, true); // Test < 0, so jump over if >= 0. @@ -428,10 +417,15 @@ void Jit64::DoMergedBranchCondition() else // SO bit, do not branch (we don't emulate SO for cmp). pDontBranch = J(true); - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); - DoMergedBranch(); + gpr.Flush(); + fpr.Flush(); + + DoMergedBranch(); + } SetJumpTarget(pDontBranch); @@ -452,8 +446,8 @@ void Jit64::DoMergedBranchImmediate(s64 val) bool condition = !!(next.BO & BO_BRANCH_IF_TRUE); const u32 nextPC = js.op[1].address; - gpr.UnlockAll(); - gpr.UnlockAllX(); + ASSERT(gpr.IsAllUnlocked()); + bool branch; if (test_bit & 8) branch = condition ? val < 0 : val >= 0; @@ -488,42 +482,39 @@ void Jit64::cmpXX(UGeckoInstruction inst) u32 crf = inst.CRFD; bool merge_branch = CheckMergedBranch(crf); - OpArg comparand; bool signedCompare; - if (inst.OPCD == 31) + RCOpArg comparand; + switch (inst.OPCD) { - // cmp / cmpl - gpr.Lock(a, b); - comparand = gpr.R(b); + // cmp / cmpl + case 31: signedCompare = (inst.SUBOP10 == 0); - } - else - { - gpr.Lock(a); - if (inst.OPCD == 10) - { - // cmpli - comparand = Imm32((u32)inst.UIMM); - signedCompare = false; - } - else if (inst.OPCD == 11) - { - // cmpi - comparand = Imm32((u32)(s32)(s16)inst.UIMM); - signedCompare = true; - } - else - { - signedCompare = false; // silence compiler warning - PanicAlert("cmpXX"); - } + comparand = signedCompare ? gpr.Use(b, RCMode::Read) : gpr.Bind(b, RCMode::Read); + RegCache::Realize(comparand); + break; + + // cmpli + case 10: + signedCompare = false; + comparand = RCOpArg::Imm32((u32)inst.UIMM); + break; + + // cmpi + case 11: + signedCompare = true; + comparand = RCOpArg::Imm32((u32)(s32)(s16)inst.UIMM); + break; + + default: + signedCompare = false; // silence compiler warning + PanicAlert("cmpXX"); } - if (gpr.R(a).IsImm() && comparand.IsImm()) + if (gpr.IsImm(a) && comparand.IsImm()) { // Both registers contain immediate values, so we can pre-compile the compare result - s64 compareResult = signedCompare ? (s64)gpr.R(a).SImm32() - (s64)comparand.SImm32() : - (u64)gpr.R(a).Imm32() - (u64)comparand.Imm32(); + s64 compareResult = signedCompare ? (s64)gpr.SImm32(a) - (s64)comparand.SImm32() : + (u64)gpr.Imm32(a) - (u64)comparand.Imm32(); if (compareResult == (s32)compareResult) { MOV(64, PPCSTATE(cr_val[crf]), Imm32((u32)compareResult)); @@ -535,73 +526,83 @@ void Jit64::cmpXX(UGeckoInstruction inst) } if (merge_branch) + { + RegCache::Unlock(comparand); DoMergedBranchImmediate(compareResult); + } + + return; + } + + if (!gpr.IsImm(a) && !signedCompare && comparand.IsImm() && comparand.Imm32() == 0) + { + RCX64Reg Ra = gpr.Bind(a, RCMode::Read); + RegCache::Realize(Ra); + + MOV(64, PPCSTATE(cr_val[crf]), Ra); + if (merge_branch) + { + TEST(64, Ra, Ra); + RegCache::Unlock(comparand, Ra); + DoMergedBranchCondition(); + } + return; + } + + const X64Reg input = RSCRATCH; + if (gpr.IsImm(a)) + { + if (signedCompare) + MOV(64, R(input), Imm32(gpr.SImm32(a))); + else + MOV(32, R(input), Imm32(gpr.Imm32(a))); } else { - X64Reg input = RSCRATCH; + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RegCache::Realize(Ra); if (signedCompare) - { - if (gpr.R(a).IsImm()) - MOV(64, R(input), Imm32(gpr.R(a).SImm32())); - else - MOVSX(64, 32, input, gpr.R(a)); - - if (!comparand.IsImm()) - { - MOVSX(64, 32, RSCRATCH2, comparand); - comparand = R(RSCRATCH2); - } - } + MOVSX(64, 32, input, Ra); else - { - if (gpr.R(a).IsImm()) - { - MOV(32, R(input), Imm32(gpr.R(a).Imm32())); - } - else if (comparand.IsImm() && !comparand.Imm32()) - { - gpr.BindToRegister(a, true, false); - input = gpr.RX(a); - } - else - { - MOVZX(64, 32, input, gpr.R(a)); - } - - if (comparand.IsImm()) - { - // sign extension will ruin this, so store it in a register - if (comparand.Imm32() & 0x80000000U) - { - MOV(32, R(RSCRATCH2), comparand); - comparand = R(RSCRATCH2); - } - } - else - { - gpr.BindToRegister(b, true, false); - comparand = gpr.R(b); - } - } - if (comparand.IsImm() && !comparand.Imm32()) - { - MOV(64, PPCSTATE(cr_val[crf]), R(input)); - // Place the comparison next to the branch for macro-op fusion - if (merge_branch) - TEST(64, R(input), R(input)); - } - else - { - SUB(64, R(input), comparand); - MOV(64, PPCSTATE(cr_val[crf]), R(input)); - } - - if (merge_branch) - DoMergedBranchCondition(); + MOVZX(64, 32, input, Ra); } - gpr.UnlockAll(); + if (comparand.IsImm()) + { + // sign extension will ruin this, so store it in a register + if (!signedCompare && (comparand.Imm32() & 0x80000000U) != 0) + { + MOV(32, R(RSCRATCH2), comparand); + comparand = RCOpArg::R(RSCRATCH2); + } + } + else + { + if (signedCompare) + { + MOVSX(64, 32, RSCRATCH2, comparand); + comparand = RCOpArg::R(RSCRATCH2); + } + } + + if (comparand.IsImm() && comparand.Imm32() == 0) + { + MOV(64, PPCSTATE(cr_val[crf]), R(input)); + // Place the comparison next to the branch for macro-op fusion + if (merge_branch) + TEST(64, R(input), R(input)); + } + else + { + SUB(64, R(input), comparand); + MOV(64, PPCSTATE(cr_val[crf]), R(input)); + } + + if (merge_branch) + { + RegCache::Unlock(comparand); + DoMergedBranchCondition(); + } } void Jit64::boolX(UGeckoInstruction inst) @@ -612,10 +613,10 @@ void Jit64::boolX(UGeckoInstruction inst) bool needs_test = false; DEBUG_ASSERT_MSG(DYNA_REC, inst.OPCD == 31, "Invalid boolX"); - if (gpr.R(s).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(s, b)) { - const u32 rs_offset = gpr.R(s).Imm32(); - const u32 rb_offset = gpr.R(b).Imm32(); + const u32 rs_offset = gpr.Imm32(s); + const u32 rb_offset = gpr.Imm32(b); if (inst.SUBOP10 == 28) // andx gpr.SetImmediate32(a, rs_offset & rb_offset); @@ -640,33 +641,33 @@ void Jit64::boolX(UGeckoInstruction inst) { if (a != s) { - gpr.Lock(a, s); - gpr.BindToRegister(a, false, true); - MOV(32, gpr.R(a), gpr.R(s)); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + MOV(32, Ra, Rs); } else if (inst.Rc) { - gpr.BindToRegister(a, true, false); + gpr.Bind(a, RCMode::Read).Realize(); } needs_test = true; } else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */)) { - if (a != s) + if (a == s && !inst.Rc) { - gpr.Lock(a, s); - gpr.BindToRegister(a, false, true); - MOV(32, gpr.R(a), gpr.R(s)); - } - else if (inst.Rc) - { - gpr.BindToRegister(a, true, true); + RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite); + RegCache::Realize(Ra); + NOT(32, Ra); } else { - gpr.KillImmediate(a, true, true); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + MOV(32, Ra, Rs); + NOT(32, Ra); } - NOT(32, gpr.R(a)); needs_test = true; } else if ((inst.SUBOP10 == 412 /* orcx */) || (inst.SUBOP10 == 284 /* eqvx */)) @@ -684,70 +685,72 @@ void Jit64::boolX(UGeckoInstruction inst) } else if ((a == s) || (a == b)) { - gpr.Lock(a, ((a == s) ? b : s)); - OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s)); - gpr.BindToRegister(a, true, true); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCOpArg operand = gpr.Use(a == s ? b : s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rb, Rs, operand, Ra); if (inst.SUBOP10 == 28) // andx { - AND(32, gpr.R(a), operand); + AND(32, Ra, operand); } else if (inst.SUBOP10 == 476) // nandx { - AND(32, gpr.R(a), operand); - NOT(32, gpr.R(a)); + AND(32, Ra, operand); + NOT(32, Ra); needs_test = true; } else if (inst.SUBOP10 == 60) // andcx { - if (cpu_info.bBMI1 && gpr.R(b).IsSimpleReg() && !gpr.R(s).IsImm()) + if (cpu_info.bBMI1 && Rb.IsSimpleReg() && !Rs.IsImm()) { - ANDN(32, gpr.RX(a), gpr.RX(b), gpr.R(s)); + ANDN(32, Ra, Rb.GetSimpleReg(), Rs); } else if (a == b) { - NOT(32, gpr.R(a)); - AND(32, gpr.R(a), operand); + NOT(32, Ra); + AND(32, Ra, operand); } else { MOV(32, R(RSCRATCH), operand); NOT(32, R(RSCRATCH)); - AND(32, gpr.R(a), R(RSCRATCH)); + AND(32, Ra, R(RSCRATCH)); } } else if (inst.SUBOP10 == 444) // orx { - OR(32, gpr.R(a), operand); + OR(32, Ra, operand); } else if (inst.SUBOP10 == 124) // norx { - OR(32, gpr.R(a), operand); - NOT(32, gpr.R(a)); + OR(32, Ra, operand); + NOT(32, Ra); needs_test = true; } else if (inst.SUBOP10 == 412) // orcx { if (a == b) { - NOT(32, gpr.R(a)); - OR(32, gpr.R(a), operand); + NOT(32, Ra); + OR(32, Ra, operand); } else { MOV(32, R(RSCRATCH), operand); NOT(32, R(RSCRATCH)); - OR(32, gpr.R(a), R(RSCRATCH)); + OR(32, Ra, R(RSCRATCH)); } } else if (inst.SUBOP10 == 316) // xorx { - XOR(32, gpr.R(a), operand); + XOR(32, Ra, operand); } else if (inst.SUBOP10 == 284) // eqvx { - NOT(32, gpr.R(a)); - XOR(32, gpr.R(a), operand); + NOT(32, Ra); + XOR(32, Ra, operand); } else { @@ -756,62 +759,64 @@ void Jit64::boolX(UGeckoInstruction inst) } else { - gpr.Lock(a, s, b); - gpr.BindToRegister(a, false, true); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rb, Rs, Ra); if (inst.SUBOP10 == 28) // andx { - MOV(32, gpr.R(a), gpr.R(s)); - AND(32, gpr.R(a), gpr.R(b)); + MOV(32, Ra, Rs); + AND(32, Ra, Rb); } else if (inst.SUBOP10 == 476) // nandx { - MOV(32, gpr.R(a), gpr.R(s)); - AND(32, gpr.R(a), gpr.R(b)); - NOT(32, gpr.R(a)); + MOV(32, Ra, Rs); + AND(32, Ra, Rb); + NOT(32, Ra); needs_test = true; } else if (inst.SUBOP10 == 60) // andcx { - if (cpu_info.bBMI1 && gpr.R(b).IsSimpleReg() && !gpr.R(s).IsImm()) + if (cpu_info.bBMI1 && Rb.IsSimpleReg() && !Rs.IsImm()) { - ANDN(32, gpr.RX(a), gpr.RX(b), gpr.R(s)); + ANDN(32, Ra, Rb.GetSimpleReg(), Rs); } else { - MOV(32, gpr.R(a), gpr.R(b)); - NOT(32, gpr.R(a)); - AND(32, gpr.R(a), gpr.R(s)); + MOV(32, Ra, Rb); + NOT(32, Ra); + AND(32, Ra, Rs); } } else if (inst.SUBOP10 == 444) // orx { - MOV(32, gpr.R(a), gpr.R(s)); - OR(32, gpr.R(a), gpr.R(b)); + MOV(32, Ra, Rs); + OR(32, Ra, Rb); } else if (inst.SUBOP10 == 124) // norx { - MOV(32, gpr.R(a), gpr.R(s)); - OR(32, gpr.R(a), gpr.R(b)); - NOT(32, gpr.R(a)); + MOV(32, Ra, Rs); + OR(32, Ra, Rb); + NOT(32, Ra); needs_test = true; } else if (inst.SUBOP10 == 412) // orcx { - MOV(32, gpr.R(a), gpr.R(b)); - NOT(32, gpr.R(a)); - OR(32, gpr.R(a), gpr.R(s)); + MOV(32, Ra, Rb); + NOT(32, Ra); + OR(32, Ra, Rs); } else if (inst.SUBOP10 == 316) // xorx { - MOV(32, gpr.R(a), gpr.R(s)); - XOR(32, gpr.R(a), gpr.R(b)); + MOV(32, Ra, Rs); + XOR(32, Ra, Rb); } else if (inst.SUBOP10 == 284) // eqvx { - MOV(32, gpr.R(a), gpr.R(s)); - NOT(32, gpr.R(a)); - XOR(32, gpr.R(a), gpr.R(b)); + MOV(32, Ra, Rs); + NOT(32, Ra); + XOR(32, Ra, Rb); } else { @@ -819,8 +824,7 @@ void Jit64::boolX(UGeckoInstruction inst) } } if (inst.Rc) - ComputeRC(gpr.R(a), needs_test); - gpr.UnlockAll(); + ComputeRC(a, needs_test); } void Jit64::extsXx(UGeckoInstruction inst) @@ -830,19 +834,19 @@ void Jit64::extsXx(UGeckoInstruction inst) int a = inst.RA, s = inst.RS; int size = inst.SUBOP10 == 922 ? 16 : 8; - if (gpr.R(s).IsImm()) + if (gpr.IsImm(s)) { - gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.R(s).Imm32() : (s8)gpr.R(s).Imm32())); + gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.Imm32(s) : (s8)gpr.Imm32(s))); } else { - gpr.Lock(a, s); - gpr.BindToRegister(a, a == s, true); - MOVSX(32, size, gpr.RX(a), gpr.R(s)); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + MOVSX(32, size, Ra, Rs); } if (inst.Rc) - ComputeRC(gpr.R(a)); - gpr.UnlockAll(); + ComputeRC(a); } void Jit64::subfic(UGeckoInstruction inst) @@ -850,40 +854,42 @@ void Jit64::subfic(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITIntegerOff); int a = inst.RA, d = inst.RD; - gpr.Lock(a, d); - gpr.BindToRegister(d, a == d, true); + + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + int imm = inst.SIMM_16; if (d == a) { if (imm == 0) { // Flags act exactly like subtracting from 0 - NEG(32, gpr.R(d)); + NEG(32, Rd); // Output carry is inverted FinalizeCarry(CC_NC); } else if (imm == -1) { - NOT(32, gpr.R(d)); + NOT(32, Rd); // CA is always set in this case FinalizeCarry(true); } else { - NOT(32, gpr.R(d)); - ADD(32, gpr.R(d), Imm32(imm + 1)); + NOT(32, Rd); + ADD(32, Rd, Imm32(imm + 1)); // Output carry is normal FinalizeCarry(CC_C); } } else { - MOV(32, gpr.R(d), Imm32(imm)); - SUB(32, gpr.R(d), gpr.R(a)); + MOV(32, Rd, Imm32(imm)); + SUB(32, Rd, Ra); // Output carry is inverted FinalizeCarry(CC_NC); } - gpr.UnlockAll(); // This instruction has no RC flag } @@ -893,54 +899,60 @@ void Jit64::subfx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(a) && gpr.IsImm(b)) { - s32 i = gpr.R(b).SImm32(), j = gpr.R(a).SImm32(); + s32 i = gpr.SImm32(b), j = gpr.SImm32(a); gpr.SetImmediate32(d, i - j); if (inst.OE) GenerateConstantOverflow((s64)i - (s64)j); } else { - gpr.Lock(a, b, d); - gpr.BindToRegister(d, (d == a || d == b), true); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rd); + if (d == b) { - SUB(32, gpr.R(d), gpr.R(a)); + SUB(32, Rd, Ra); } else if (d == a) { - MOV(32, R(RSCRATCH), gpr.R(a)); - MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), R(RSCRATCH)); + MOV(32, R(RSCRATCH), Ra); + MOV(32, Rd, Rb); + SUB(32, Rd, R(RSCRATCH)); } else { - MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), gpr.R(a)); + MOV(32, Rd, Rb); + SUB(32, Rd, Ra); } if (inst.OE) GenerateOverflow(); } if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); + ComputeRC(d); } void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow) { + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + // simplest cases first if (imm == 0) { - XOR(32, gpr.R(d), gpr.R(d)); + XOR(32, Rd, Rd); return; } if (imm == (u32)-1) { if (d != a) - MOV(32, gpr.R(d), gpr.R(a)); - NEG(32, gpr.R(d)); + MOV(32, Rd, Ra); + NEG(32, Rd); return; } @@ -952,16 +964,16 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow) { u32 shift = IntLog2(imm); // use LEA if it saves an op - if (d != a && shift <= 3 && shift >= 1 && gpr.R(a).IsSimpleReg()) + if (d != a && shift <= 3 && shift >= 1 && Ra.IsSimpleReg()) { - LEA(32, gpr.RX(d), MScaled(gpr.RX(a), SCALE_1 << shift, 0)); + LEA(32, Rd, MScaled(Ra.GetSimpleReg(), SCALE_1 << shift, 0)); } else { if (d != a) - MOV(32, gpr.R(d), gpr.R(a)); + MOV(32, Rd, Ra); if (shift) - SHL(32, gpr.R(d), Imm8(shift)); + SHL(32, Rd, Imm8(shift)); } return; } @@ -971,18 +983,16 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow) static constexpr std::array lea_scales{{3, 5, 9}}; for (size_t i = 0; i < lea_scales.size(); i++) { - if (imm == lea_scales[i]) + if (imm == lea_scales[i] && Ra.IsSimpleReg()) { - if (d != a) - gpr.BindToRegister(a, true, false); - LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(a), SCALE_2 << i, 0)); + LEA(32, Rd, MComplex(Ra.GetSimpleReg(), Ra.GetSimpleReg(), SCALE_2 << i, 0)); return; } } } // if we didn't find any better options - IMUL(32, gpr.RX(d), gpr.R(a), Imm32(imm)); + IMUL(32, Rd, Ra, Imm32(imm)); } void Jit64::mulli(UGeckoInstruction inst) @@ -992,16 +1002,13 @@ void Jit64::mulli(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; u32 imm = inst.SIMM_16; - if (gpr.R(a).IsImm()) + if (gpr.IsImm(a)) { - gpr.SetImmediate32(d, gpr.R(a).Imm32() * imm); + gpr.SetImmediate32(d, gpr.Imm32(a) * imm); } else { - gpr.Lock(a, d); - gpr.BindToRegister(d, (d == a), true); MultiplyImmediate(imm, a, d, false); - gpr.UnlockAll(); } } @@ -1011,42 +1018,46 @@ void Jit64::mullwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(a, b)) { - s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32(); + s32 i = gpr.SImm32(a), j = gpr.SImm32(b); gpr.SetImmediate32(d, i * j); if (inst.OE) GenerateConstantOverflow((s64)i * (s64)j); } + else if (gpr.IsImm(a) || gpr.IsImm(b)) + { + u32 imm = gpr.IsImm(a) ? gpr.Imm32(a) : gpr.Imm32(b); + int src = gpr.IsImm(a) ? b : a; + MultiplyImmediate(imm, src, d, inst.OE); + if (inst.OE) + GenerateOverflow(); + } else { - gpr.Lock(a, b, d); - gpr.BindToRegister(d, (d == a || d == b), true); - if (gpr.R(a).IsImm() || gpr.R(b).IsImm()) + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rd); + + if (d == a) { - u32 imm = gpr.R(a).IsImm() ? gpr.R(a).Imm32() : gpr.R(b).Imm32(); - int src = gpr.R(a).IsImm() ? b : a; - MultiplyImmediate(imm, src, d, inst.OE); - } - else if (d == a) - { - IMUL(32, gpr.RX(d), gpr.R(b)); + IMUL(32, Rd, Rb); } else if (d == b) { - IMUL(32, gpr.RX(d), gpr.R(a)); + IMUL(32, Rd, Ra); } else { - MOV(32, gpr.R(d), gpr.R(b)); - IMUL(32, gpr.RX(d), gpr.R(a)); + MOV(32, Rd, Rb); + IMUL(32, Rd, Ra); } if (inst.OE) GenerateOverflow(); } if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); + ComputeRC(d); } void Jit64::mulhwXx(UGeckoInstruction inst) @@ -1056,41 +1067,45 @@ void Jit64::mulhwXx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; bool sign = inst.SUBOP10 == 75; - if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(a, b)) { if (sign) - gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.R(a).SImm32() * (s64)gpr.R(b).SImm32())) >> 32)); + gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.SImm32(a) * (s64)gpr.SImm32(b))) >> 32)); else - gpr.SetImmediate32(d, (u32)(((u64)gpr.R(a).Imm32() * (u64)gpr.R(b).Imm32()) >> 32)); + gpr.SetImmediate32(d, (u32)(((u64)gpr.Imm32(a) * (u64)gpr.Imm32(b)) >> 32)); } else if (sign) { - gpr.Lock(a, b, d); - // no register choice - gpr.FlushLockX(EDX, EAX); - gpr.BindToRegister(d, d == a || d == b, true); - MOV(32, R(EAX), gpr.R(a)); - gpr.KillImmediate(b, true, false); - IMUL(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EDX)); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.UseNoImm(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RCX64Reg eax = gpr.Scratch(EAX); + RCX64Reg edx = gpr.Scratch(EDX); + RegCache::Realize(Ra, Rb, Rd, eax, edx); + + MOV(32, eax, Ra); + IMUL(32, Rb); + MOV(32, Rd, edx); } else { // Not faster for signed because we'd need two movsx. - gpr.Lock(a, b, d); // We need to bind everything to registers since the top 32 bits need to be zero. int src = d == b ? a : b; - gpr.BindToRegister(d, d == a || d == b, true); - gpr.BindToRegister(src, true, false); - if (d != a && d != b) - MOV(32, gpr.R(d), gpr.R(a)); - IMUL(64, gpr.RX(d), gpr.R(src)); - SHR(64, gpr.R(d), Imm8(32)); + int other = src == b ? a : b; + + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RCX64Reg Rsrc = gpr.Bind(src, RCMode::Read); + RCOpArg Rother = gpr.Use(other, RCMode::Read); + RegCache::Realize(Rd, Rsrc, Rother); + + if (other != d) + MOV(32, Rd, Rother); + IMUL(64, Rd, Rsrc); + SHR(64, Rd, Imm8(32)); } if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + ComputeRC(d); } void Jit64::divwux(UGeckoInstruction inst) @@ -1099,9 +1114,9 @@ void Jit64::divwux(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(a, b)) { - if (gpr.R(b).Imm32() == 0) + if (gpr.Imm32(b) == 0) { gpr.SetImmediate32(d, 0); if (inst.OE) @@ -1109,14 +1124,14 @@ void Jit64::divwux(UGeckoInstruction inst) } else { - gpr.SetImmediate32(d, gpr.R(a).Imm32() / gpr.R(b).Imm32()); + gpr.SetImmediate32(d, gpr.Imm32(a) / gpr.Imm32(b)); if (inst.OE) GenerateConstantOverflow(false); } } - else if (gpr.R(b).IsImm()) + else if (gpr.IsImm(b)) { - u32 divisor = gpr.R(b).Imm32(); + u32 divisor = gpr.Imm32(b); if (divisor == 0) { gpr.SetImmediate32(d, 0); @@ -1131,12 +1146,14 @@ void Jit64::divwux(UGeckoInstruction inst) if (divisor == (u32)(1 << shift)) { - gpr.Lock(a, b, d); - gpr.BindToRegister(d, d == a, true); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + if (d != a) - MOV(32, gpr.R(d), gpr.R(a)); + MOV(32, Rd, Ra); if (shift) - SHR(32, gpr.R(d), Imm8(shift)); + SHR(32, Rd, Imm8(shift)); } else { @@ -1148,32 +1165,35 @@ void Jit64::divwux(UGeckoInstruction inst) if (((u64)(magic + 1) * (max_quotient * divisor - 1)) >> (shift + 32) != max_quotient - 1) { // If failed, use slower round-down method - gpr.Lock(a, b, d); - gpr.BindToRegister(d, d == a, true); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + MOV(32, R(RSCRATCH), Imm32(magic)); if (d != a) - MOV(32, gpr.R(d), gpr.R(a)); - IMUL(64, gpr.RX(d), R(RSCRATCH)); - ADD(64, gpr.R(d), R(RSCRATCH)); - SHR(64, gpr.R(d), Imm8(shift + 32)); + MOV(32, Rd, Ra); + IMUL(64, Rd, R(RSCRATCH)); + ADD(64, Rd, R(RSCRATCH)); + SHR(64, Rd, Imm8(shift + 32)); } else { // If success, use faster round-up method - gpr.Lock(a, b, d); - gpr.BindToRegister(a, true, false); - gpr.BindToRegister(d, false, true); + RCX64Reg Ra = gpr.Bind(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + if (d == a) { MOV(32, R(RSCRATCH), Imm32(magic + 1)); - IMUL(64, gpr.RX(d), R(RSCRATCH)); + IMUL(64, Rd, R(RSCRATCH)); } else { - MOV(32, gpr.R(d), Imm32(magic + 1)); - IMUL(64, gpr.RX(d), gpr.R(a)); + MOV(32, Rd, Imm32(magic + 1)); + IMUL(64, Rd, Ra); } - SHR(64, gpr.R(d), Imm8(shift + 32)); + SHR(64, Rd, Imm8(shift + 32)); } } if (inst.OE) @@ -1182,24 +1202,27 @@ void Jit64::divwux(UGeckoInstruction inst) } else { - gpr.Lock(a, b, d); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rb = gpr.Bind(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); // no register choice (do we need to do this?) - gpr.FlushLockX(EAX, EDX); - gpr.BindToRegister(d, (d == a || d == b), true); - MOV(32, R(EAX), gpr.R(a)); - XOR(32, R(EDX), R(EDX)); - gpr.KillImmediate(b, true, false); - CMP_or_TEST(32, gpr.R(b), Imm32(0)); + RCX64Reg eax = gpr.Scratch(EAX); + RCX64Reg edx = gpr.Scratch(EDX); + RegCache::Realize(Ra, Rb, Rd, eax, edx); + + MOV(32, eax, Ra); + XOR(32, edx, edx); + TEST(32, Rb, Rb); FixupBranch not_div_by_zero = J_CC(CC_NZ); - MOV(32, gpr.R(d), R(EDX)); + MOV(32, Rd, edx); if (inst.OE) { GenerateConstantOverflow(true); } FixupBranch end = J(); SetJumpTarget(not_div_by_zero); - DIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); + DIV(32, Rb); + MOV(32, Rd, eax); if (inst.OE) { GenerateConstantOverflow(false); @@ -1207,9 +1230,7 @@ void Jit64::divwux(UGeckoInstruction inst) SetJumpTarget(end); } if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + ComputeRC(d); } void Jit64::divwx(UGeckoInstruction inst) @@ -1218,9 +1239,9 @@ void Jit64::divwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(a, b)) { - s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32(); + s32 i = gpr.SImm32(a), j = gpr.SImm32(b); if (j == 0 || (i == (s32)0x80000000 && j == -1)) { const u32 result = i < 0 ? 0xFFFFFFFF : 0x00000000; @@ -1237,25 +1258,27 @@ void Jit64::divwx(UGeckoInstruction inst) } else { - gpr.Lock(a, b, d); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rb = gpr.Bind(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); // no register choice - gpr.FlushLockX(EAX, EDX); - gpr.BindToRegister(d, (d == a || d == b), true); - MOV(32, R(EAX), gpr.R(a)); - gpr.BindToRegister(b, true, false); + RCX64Reg eax = gpr.Scratch(EAX); + RCX64Reg edx = gpr.Scratch(EDX); + RegCache::Realize(Ra, Rb, Rd, eax, edx); - TEST(32, gpr.R(b), gpr.R(b)); + MOV(32, eax, Ra); + TEST(32, Rb, Rb); const FixupBranch overflow = J_CC(CC_E); - CMP(32, R(EAX), Imm32(0x80000000)); + CMP(32, eax, Imm32(0x80000000)); const FixupBranch normal_path1 = J_CC(CC_NE); - CMP(32, gpr.R(b), Imm32(0xFFFFFFFF)); + CMP(32, Rb, Imm32(0xFFFFFFFF)); const FixupBranch normal_path2 = J_CC(CC_NE); SetJumpTarget(overflow); - SAR(32, R(EAX), Imm8(31)); - MOV(32, gpr.R(d), R(EAX)); + SAR(32, eax, Imm8(31)); + MOV(32, Rd, eax); if (inst.OE) { GenerateConstantOverflow(true); @@ -1266,8 +1289,8 @@ void Jit64::divwx(UGeckoInstruction inst) SetJumpTarget(normal_path2); CDQ(); - IDIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); + IDIV(32, Rb); + MOV(32, Rd, eax); if (inst.OE) { GenerateConstantOverflow(false); @@ -1275,9 +1298,7 @@ void Jit64::divwx(UGeckoInstruction inst) SetJumpTarget(done); } if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + ComputeRC(d); } void Jit64::addx(UGeckoInstruction inst) @@ -1286,40 +1307,39 @@ void Jit64::addx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + if (gpr.IsImm(a, b)) { - s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32(); + s32 i = gpr.SImm32(a), j = gpr.SImm32(b); gpr.SetImmediate32(d, i + j); if (inst.OE) GenerateConstantOverflow((s64)i + (s64)j); } - else if ((d == a) || (d == b)) - { - int operand = ((d == a) ? b : a); - gpr.Lock(a, b, d); - gpr.BindToRegister(d, true); - ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.OE) - GenerateOverflow(); - } - else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && !inst.OE) - { - gpr.Lock(a, b, d); - gpr.BindToRegister(d, false); - LEA(32, gpr.RX(d), MRegSum(gpr.RX(a), gpr.RX(b))); - } else { - gpr.Lock(a, b, d); - gpr.BindToRegister(d, false); - MOV(32, gpr.R(d), gpr.R(a)); - ADD(32, gpr.R(d), gpr.R(b)); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rd); + + if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE) + { + LEA(32, Rd, MRegSum(Ra.GetSimpleReg(), Rb.GetSimpleReg())); + } + else if (d == b) + { + ADD(32, Rd, Ra); + } + else + { + if (d != a) + MOV(32, Rd, Ra); + ADD(32, Rd, Rb); + } if (inst.OE) GenerateOverflow(); } if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); + ComputeRC(d); } void Jit64::arithXex(UGeckoInstruction inst) @@ -1334,8 +1354,6 @@ void Jit64::arithXex(UGeckoInstruction inst) int d = inst.RD; bool same_input_sub = !add && regsource && a == b; - gpr.Lock(a, b, d); - gpr.BindToRegister(d, !same_input_sub && (d == a || d == b)); if (!js.carryFlagSet) JitGetAndClearCAOV(inst.OE); else @@ -1345,45 +1363,56 @@ void Jit64::arithXex(UGeckoInstruction inst) // Special case: subfe A, B, B is a common compiler idiom if (same_input_sub) { + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Rd); + // Convert carry to borrow if (!js.carryFlagInverted) CMC(); - SBB(32, gpr.R(d), gpr.R(d)); + SBB(32, Rd, Rd); invertedCarry = true; } else if (!add && regsource && d == b) { + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::ReadWrite); + RegCache::Realize(Ra, Rd); + if (!js.carryFlagInverted) CMC(); - SBB(32, gpr.R(d), gpr.R(a)); + SBB(32, Rd, Ra); invertedCarry = true; } else { - OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RCOpArg source = + regsource ? gpr.Use(d == b ? a : b, RCMode::Read) : RCOpArg::Imm32(mex ? 0xFFFFFFFF : 0); + RegCache::Realize(Ra, Rb, Rd, source); + if (d != a && d != b) - MOV(32, gpr.R(d), gpr.R(a)); + MOV(32, Rd, Ra); if (!add) - NOT(32, gpr.R(d)); + NOT(32, Rd); // if the source is an immediate, we can invert carry by going from add -> sub and doing src = // -1 - src if (js.carryFlagInverted && source.IsImm()) { - source = Imm32(-1 - source.SImm32()); - SBB(32, gpr.R(d), source); + SBB(32, Rd, Imm32(-1 - source.SImm32())); invertedCarry = true; } else { if (js.carryFlagInverted) CMC(); - ADC(32, gpr.R(d), source); + ADC(32, Rd, source); } } FinalizeCarryOverflow(inst.OE, invertedCarry); if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); + ComputeRC(d); } void Jit64::arithcx(UGeckoInstruction inst) @@ -1392,37 +1421,41 @@ void Jit64::arithcx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); bool add = !!(inst.SUBOP10 & 2); // add or sub int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.BindToRegister(d, d == a || d == b, true); - if (d == a && d != b) { - if (add) + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rd); + + if (d == a && d != b) { - ADD(32, gpr.R(d), gpr.R(b)); + if (add) + { + ADD(32, Rd, Rb); + } + else + { + // special case, because sub isn't reversible + MOV(32, R(RSCRATCH), Ra); + MOV(32, Rd, Rb); + SUB(32, Rd, R(RSCRATCH)); + } } else { - // special case, because sub isn't reversible - MOV(32, R(RSCRATCH), gpr.R(a)); - MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), R(RSCRATCH)); + if (d != b) + MOV(32, Rd, Rb); + if (add) + ADD(32, Rd, Ra); + else + SUB(32, Rd, Ra); } } - else - { - if (d != b) - MOV(32, gpr.R(d), gpr.R(b)); - if (add) - ADD(32, gpr.R(d), gpr.R(a)); - else - SUB(32, gpr.R(d), gpr.R(a)); - } FinalizeCarryOverflow(inst.OE, !add); if (inst.Rc) - ComputeRC(gpr.R(d)); - gpr.UnlockAll(); + ComputeRC(d); } void Jit64::rlwinmx(UGeckoInstruction inst) @@ -1432,15 +1465,15 @@ void Jit64::rlwinmx(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - if (gpr.R(s).IsImm()) + if (gpr.IsImm(s)) { - u32 result = gpr.R(s).Imm32(); + u32 result = gpr.Imm32(s); if (inst.SH != 0) result = Common::RotateLeft(result, inst.SH); result &= MakeRotationMask(inst.MB, inst.ME); gpr.SetImmediate32(a, result); if (inst.Rc) - ComputeRC(gpr.R(a)); + ComputeRC(a); } else { @@ -1455,59 +1488,64 @@ void Jit64::rlwinmx(UGeckoInstruction inst) bool needs_sext = true; int mask_size = inst.ME - inst.MB + 1; - gpr.Lock(a, s); - gpr.BindToRegister(a, a == s); - if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3) + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + + if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3) { - LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0)); + LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0)); } // common optimized case: byte/word extract else if (simple_mask && !(inst.SH & (mask_size - 1))) { - MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0)); + MOVZX(32, mask_size, Ra, Rs.ExtractWithByteOffset(inst.SH ? (32 - inst.SH) >> 3 : 0)); needs_sext = false; } // another optimized special case: byte/word extract plus shift else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift && ((mask >> inst.SH) == 0xff || (mask >> inst.SH) == 0xffff)) { - MOVZX(32, mask_size, gpr.RX(a), gpr.R(s)); - SHL(32, gpr.R(a), Imm8(inst.SH)); + MOVZX(32, mask_size, Ra, Rs); + SHL(32, Ra, Imm8(inst.SH)); needs_sext = inst.SH + mask_size >= 32; } else { if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); + MOV(32, Ra, Rs); if (left_shift) { - SHL(32, gpr.R(a), Imm8(inst.SH)); + SHL(32, Ra, Imm8(inst.SH)); } else if (right_shift) { - SHR(32, gpr.R(a), Imm8(inst.MB)); + SHR(32, Ra, Imm8(inst.MB)); needs_sext = false; } else { if (inst.SH != 0) - ROL(32, gpr.R(a), Imm8(inst.SH)); + ROL(32, Ra, Imm8(inst.SH)); if (!(inst.MB == 0 && inst.ME == 31)) { // we need flags if we're merging the branch if (inst.Rc && CheckMergedBranch(0)) - AND(32, gpr.R(a), Imm32(mask)); + AND(32, Ra, Imm32(mask)); else - AndWithMask(gpr.RX(a), mask); + AndWithMask(Ra, mask); needs_sext = inst.MB == 0; needs_test = false; } } } + + Rs.Unlock(); + Ra.Unlock(); + if (inst.Rc) - ComputeRC(gpr.R(a), needs_test, needs_sext); - gpr.UnlockAll(); + ComputeRC(a, needs_test, needs_sext); } } @@ -1518,17 +1556,16 @@ void Jit64::rlwimix(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - if (gpr.R(a).IsImm() && gpr.R(s).IsImm()) + if (gpr.IsImm(a, s)) { const u32 mask = MakeRotationMask(inst.MB, inst.ME); - gpr.SetImmediate32(a, (gpr.R(a).Imm32() & ~mask) | - (Common::RotateLeft(gpr.R(s).Imm32(), inst.SH) & mask)); + gpr.SetImmediate32(a, + (gpr.Imm32(a) & ~mask) | (Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); if (inst.Rc) - ComputeRC(gpr.R(a)); + ComputeRC(a); } else { - gpr.Lock(a, s); const u32 mask = MakeRotationMask(inst.MB, inst.ME); bool needs_test = false; if (mask == 0 || (a == s && inst.SH == 0)) @@ -1537,79 +1574,90 @@ void Jit64::rlwimix(UGeckoInstruction inst) } else if (mask == 0xFFFFFFFF) { - gpr.BindToRegister(a, a == s, true); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Read); + RegCache::Realize(Rs, Ra); if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); + MOV(32, Ra, Rs); if (inst.SH) - ROL(32, gpr.R(a), Imm8(inst.SH)); + ROL(32, Ra, Imm8(inst.SH)); needs_test = true; } - else if (gpr.R(s).IsImm()) + else if (gpr.IsImm(s)) { - gpr.BindToRegister(a, true, true); - AndWithMask(gpr.RX(a), ~mask); - OR(32, gpr.R(a), Imm32(Common::RotateLeft(gpr.R(s).Imm32(), inst.SH) & mask)); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Ra); + AndWithMask(Ra, ~mask); + OR(32, Ra, Imm32(Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); } else if (inst.SH) { bool isLeftShift = mask == 0U - (1U << inst.SH); bool isRightShift = mask == (1U << inst.SH) - 1; - if (gpr.R(a).IsImm()) + if (gpr.IsImm(a)) { - u32 maskA = gpr.R(a).Imm32() & ~mask; - gpr.BindToRegister(a, false, true); - MOV(32, gpr.R(a), gpr.R(s)); + u32 maskA = gpr.Imm32(a) & ~mask; + + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + + MOV(32, Ra, Rs); if (isLeftShift) { - SHL(32, gpr.R(a), Imm8(inst.SH)); + SHL(32, Ra, Imm8(inst.SH)); } else if (isRightShift) { - SHR(32, gpr.R(a), Imm8(32 - inst.SH)); + SHR(32, Ra, Imm8(32 - inst.SH)); } else { - ROL(32, gpr.R(a), Imm8(inst.SH)); - AND(32, gpr.R(a), Imm32(mask)); + ROL(32, Ra, Imm8(inst.SH)); + AND(32, Ra, Imm32(mask)); } - OR(32, gpr.R(a), Imm32(maskA)); + OR(32, Ra, Imm32(maskA)); } else { // TODO: common cases of this might be faster with pinsrb or abuse of AH - gpr.BindToRegister(a, true, true); - MOV(32, R(RSCRATCH), gpr.R(s)); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + + MOV(32, R(RSCRATCH), Rs); if (isLeftShift) { SHL(32, R(RSCRATCH), Imm8(inst.SH)); - AndWithMask(gpr.RX(a), ~mask); - OR(32, gpr.R(a), R(RSCRATCH)); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); } else if (isRightShift) { SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); - AndWithMask(gpr.RX(a), ~mask); - OR(32, gpr.R(a), R(RSCRATCH)); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); } else { ROL(32, R(RSCRATCH), Imm8(inst.SH)); - XOR(32, R(RSCRATCH), gpr.R(a)); + XOR(32, R(RSCRATCH), Ra); AndWithMask(RSCRATCH, mask); - XOR(32, gpr.R(a), R(RSCRATCH)); + XOR(32, Ra, R(RSCRATCH)); } } } else { - gpr.BindToRegister(a, true, true); - XOR(32, gpr.R(a), gpr.R(s)); - AndWithMask(gpr.RX(a), ~mask); - XOR(32, gpr.R(a), gpr.R(s)); + RCX64Reg Rs = gpr.Bind(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + XOR(32, Ra, Rs); + AndWithMask(Ra, ~mask); + XOR(32, Ra, Rs); } if (inst.Rc) - ComputeRC(gpr.R(a), needs_test); - gpr.UnlockAll(); + ComputeRC(a, needs_test); } } @@ -1620,32 +1668,32 @@ void Jit64::rlwnmx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; const u32 mask = MakeRotationMask(inst.MB, inst.ME); - if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) + if (gpr.IsImm(b, s)) { - gpr.SetImmediate32(a, Common::RotateLeft(gpr.R(s).Imm32(), gpr.R(b).Imm32() & 0x1F) & mask); + gpr.SetImmediate32(a, Common::RotateLeft(gpr.Imm32(s), gpr.Imm32(b) & 0x1F) & mask); } else { - // no register choice - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - MOV(32, R(ECX), gpr.R(b)); - gpr.BindToRegister(a, (a == s), true); + RCX64Reg ecx = gpr.Scratch(ECX); // no register choice + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(ecx, Ra, Rb, Rs); + + MOV(32, ecx, Rb); if (a != s) { - MOV(32, gpr.R(a), gpr.R(s)); + MOV(32, Ra, Rs); } - ROL(32, gpr.R(a), R(ECX)); + ROL(32, Ra, ecx); // we need flags if we're merging the branch if (inst.Rc && CheckMergedBranch(0)) - AND(32, gpr.R(a), Imm32(mask)); + AND(32, Ra, Imm32(mask)); else - AndWithMask(gpr.RX(a), mask); + AndWithMask(Ra, mask); } if (inst.Rc) - ComputeRC(gpr.R(a), false); - gpr.UnlockAll(); - gpr.UnlockAllX(); + ComputeRC(a, false); } void Jit64::negx(UGeckoInstruction inst) @@ -1655,25 +1703,26 @@ void Jit64::negx(UGeckoInstruction inst) int a = inst.RA; int d = inst.RD; - if (gpr.R(a).IsImm()) + if (gpr.IsImm(a)) { - gpr.SetImmediate32(d, ~(gpr.R(a).Imm32()) + 1); + gpr.SetImmediate32(d, ~(gpr.Imm32(a)) + 1); if (inst.OE) - GenerateConstantOverflow(gpr.R(d).Imm32() == 0x80000000); + GenerateConstantOverflow(gpr.Imm32(d) == 0x80000000); } else { - gpr.Lock(a, d); - gpr.BindToRegister(d, a == d, true); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + if (a != d) - MOV(32, gpr.R(d), gpr.R(a)); - NEG(32, gpr.R(d)); + MOV(32, Rd, Ra); + NEG(32, Rd); if (inst.OE) GenerateOverflow(); } if (inst.Rc) - ComputeRC(gpr.R(d), false); - gpr.UnlockAll(); + ComputeRC(d, false); } void Jit64::srwx(UGeckoInstruction inst) @@ -1684,29 +1733,27 @@ void Jit64::srwx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) + if (gpr.IsImm(b, s)) { - u32 amount = gpr.R(b).Imm32(); - gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.R(s).Imm32() >> (amount & 0x1f))); + u32 amount = gpr.Imm32(b); + gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.Imm32(s) >> (amount & 0x1f))); } else { - // no register choice - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - MOV(32, R(ECX), gpr.R(b)); - gpr.BindToRegister(a, a == s, true); + RCX64Reg ecx = gpr.Scratch(ECX); // no register choice + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(ecx, Ra, Rb, Rs); + + MOV(32, ecx, Rb); if (a != s) - { - MOV(32, gpr.R(a), gpr.R(s)); - } - SHR(64, gpr.R(a), R(ECX)); + MOV(32, Ra, Rs); + SHR(64, Ra, ecx); } // Shift of 0 doesn't update flags, so we need to test just in case if (inst.Rc) - ComputeRC(gpr.R(a)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + ComputeRC(a); } void Jit64::slwx(UGeckoInstruction inst) @@ -1717,34 +1764,35 @@ void Jit64::slwx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) + if (gpr.IsImm(b, s)) { - u32 amount = gpr.R(b).Imm32(); - gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.R(s).Imm32() << (amount & 0x1f)); + u32 amount = gpr.Imm32(b); + gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.Imm32(s) << (amount & 0x1f)); if (inst.Rc) - ComputeRC(gpr.R(a)); + ComputeRC(a); } else { - // no register choice - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - MOV(32, R(ECX), gpr.R(b)); - gpr.BindToRegister(a, a == s, true); + RCX64Reg ecx = gpr.Scratch(ECX); // no register choice + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(ecx, Ra, Rb, Rs); + + MOV(32, ecx, Rb); if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); - SHL(64, gpr.R(a), R(ECX)); + MOV(32, Ra, Rs); + SHL(64, Ra, ecx); if (inst.Rc) { - AND(32, gpr.R(a), gpr.R(a)); - ComputeRC(gpr.R(a), false); + AND(32, Ra, Ra); + RegCache::Unlock(ecx, Ra, Rb, Rs); + ComputeRC(a, false); } else { - MOVZX(64, 32, gpr.RX(a), gpr.R(a)); + MOVZX(64, 32, Ra, Ra); } - gpr.UnlockAll(); - gpr.UnlockAllX(); } } @@ -1757,29 +1805,32 @@ void Jit64::srawx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - gpr.FlushLockX(ECX); - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - MOV(32, R(ECX), gpr.R(b)); - if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); - SHL(64, gpr.R(a), Imm8(32)); - SAR(64, gpr.R(a), R(ECX)); - if (js.op->wantsCA) { - MOV(32, R(RSCRATCH), gpr.R(a)); - SHR(64, gpr.R(a), Imm8(32)); - TEST(32, gpr.R(a), R(RSCRATCH)); + RCX64Reg ecx = gpr.Scratch(ECX); // no register choice + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(ecx, Ra, Rb, Rs); + + MOV(32, ecx, Rb); + if (a != s) + MOV(32, Ra, Rs); + SHL(64, Ra, Imm8(32)); + SAR(64, Ra, ecx); + if (js.op->wantsCA) + { + MOV(32, R(RSCRATCH), Ra); + SHR(64, Ra, Imm8(32)); + TEST(32, Ra, R(RSCRATCH)); + } + else + { + SHR(64, Ra, Imm8(32)); + } + FinalizeCarry(CC_NZ); } - else - { - SHR(64, gpr.R(a), Imm8(32)); - } - FinalizeCarry(CC_NZ); if (inst.Rc) - ComputeRC(gpr.R(a)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + ComputeRC(a); } void Jit64::srawix(UGeckoInstruction inst) @@ -1792,49 +1843,51 @@ void Jit64::srawix(UGeckoInstruction inst) if (amount != 0) { - gpr.Lock(a, s); - gpr.BindToRegister(a, a == s, true); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Ra, Rs); + if (!js.op->wantsCA) { if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); - SAR(32, gpr.R(a), Imm8(amount)); + MOV(32, Ra, Rs); + SAR(32, Ra, Imm8(amount)); } else { - MOV(32, R(RSCRATCH), gpr.R(s)); + MOV(32, R(RSCRATCH), Rs); if (a != s) - MOV(32, gpr.R(a), R(RSCRATCH)); + MOV(32, Ra, R(RSCRATCH)); // some optimized common cases that can be done in slightly fewer ops if (amount == 1) { SHR(32, R(RSCRATCH), Imm8(31)); // sign - AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry) - SAR(32, gpr.R(a), Imm8(1)); + AND(32, R(RSCRATCH), Ra); // (sign && carry) + SAR(32, Ra, Imm8(1)); MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001 } else { - SAR(32, gpr.R(a), Imm8(amount)); + SAR(32, Ra, Imm8(amount)); SHL(32, R(RSCRATCH), Imm8(32 - amount)); - TEST(32, R(RSCRATCH), gpr.R(a)); + TEST(32, R(RSCRATCH), Ra); FinalizeCarry(CC_NZ); } } } else { - gpr.Lock(a, s); FinalizeCarry(false); - gpr.BindToRegister(a, a == s, true); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Ra, Rs); if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); + MOV(32, Ra, Rs); } if (inst.Rc) - ComputeRC(gpr.R(a)); - gpr.UnlockAll(); + ComputeRC(a); } // count leading zeroes @@ -1846,39 +1899,40 @@ void Jit64::cntlzwx(UGeckoInstruction inst) int s = inst.RS; bool needs_test = false; - if (gpr.R(s).IsImm()) + if (gpr.IsImm(s)) { u32 mask = 0x80000000; u32 i = 0; for (; i < 32; i++, mask >>= 1) { - if (gpr.R(s).Imm32() & mask) + if (gpr.Imm32(s) & mask) break; } gpr.SetImmediate32(a, i); } else { - gpr.Lock(a, s); - gpr.BindToRegister(a, a == s, true); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Ra, Rs); + if (cpu_info.bLZCNT) { - LZCNT(32, gpr.RX(a), gpr.R(s)); + LZCNT(32, Ra, Rs); needs_test = true; } else { - BSR(32, gpr.RX(a), gpr.R(s)); + BSR(32, Ra, Rs); FixupBranch gotone = J_CC(CC_NZ); - MOV(32, gpr.R(a), Imm32(63)); + MOV(32, Ra, Imm32(63)); SetJumpTarget(gotone); - XOR(32, gpr.R(a), Imm8(0x1f)); // flip order + XOR(32, Ra, Imm8(0x1f)); // flip order } } if (inst.Rc) - ComputeRC(gpr.R(a), needs_test, false); - gpr.UnlockAll(); + ComputeRC(a, needs_test, false); } void Jit64::twX(UGeckoInstruction inst) @@ -1890,13 +1944,17 @@ void Jit64::twX(UGeckoInstruction inst) if (inst.OPCD == 3) // twi { - gpr.KillImmediate(a, true, false); - CMP(32, gpr.R(a), Imm32((s32)(s16)inst.SIMM_16)); + RCOpArg Ra = gpr.UseNoImm(a, RCMode::Read); + RegCache::Realize(Ra); + CMP(32, Ra, Imm32((s32)(s16)inst.SIMM_16)); } else // tw { - gpr.BindToRegister(a, true, false); - CMP(32, gpr.R(a), gpr.R(inst.RB)); + s32 b = inst.RB; + RCX64Reg Ra = gpr.Bind(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RegCache::Realize(Ra, Rb); + CMP(32, Ra, Rb); } constexpr std::array conditions{{CC_A, CC_B, CC_E, CC_G, CC_L}}; @@ -1912,17 +1970,22 @@ void Jit64::twX(UGeckoInstruction inst) } FixupBranch dont_trap = J(); - for (const FixupBranch& fixup : fixups) { - SetJumpTarget(fixup); + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + for (const FixupBranch& fixup : fixups) + { + SetJumpTarget(fixup); + } + LOCK(); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); + + gpr.Flush(); + fpr.Flush(); + + WriteExceptionExit(); } - LOCK(); - OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); - - gpr.Flush(RegCache::FlushMode::MaintainState); - fpr.Flush(RegCache::FlushMode::MaintainState); - - WriteExceptionExit(); SetJumpTarget(dont_trap); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index bb58075107..0c6bb929a2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -18,7 +18,7 @@ #include "Core/CoreTiming.h" #include "Core/HW/CPU.h" #include "Core/HW/Memmap.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" @@ -126,12 +126,14 @@ void Jit64::lXXx(UGeckoInstruction inst) js.op[2].inst.hex == 0x4182fff8) { s32 offset = (s32)(s16)inst.SIMM_16; - gpr.BindToRegister(a, true, false); - gpr.BindToRegister(d, false, true); - SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, CallerSavedRegistersInUse(), signExtend); + RCX64Reg Ra = gpr.Bind(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + + SafeLoadToReg(Rd, Ra, accessSize, offset, CallerSavedRegistersInUse(), signExtend); // if it's still 0, we can wait until the next event - TEST(32, gpr.R(d), gpr.R(d)); + TEST(32, Rd, Rd); FixupBranch noIdle = J_CC(CC_NZ); BitSet32 registersInUse = CallerSavedRegistersInUse(); @@ -155,7 +157,7 @@ void Jit64::lXXx(UGeckoInstruction inst) // Determine whether this instruction updates inst.RA bool update; if (inst.OPCD == 31) - update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.R(b).IsImm() || gpr.R(b).Imm32() != 0); + update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.IsImm(b) || gpr.Imm32(b) != 0); else update = ((inst.OPCD & 1) != 0) && inst.SIMM_16 != 0; @@ -165,19 +167,20 @@ void Jit64::lXXx(UGeckoInstruction inst) bool storeAddress = false; s32 loadOffset = 0; + // Prepare result + RCX64Reg Rd = jo.memcheck ? gpr.RevertableBind(d, RCMode::Write) : gpr.Bind(d, RCMode::Write); + // Prepare address operand - OpArg opAddress; + RCOpArg opAddress; if (!update && !a) { if (indexed) { - if (!gpr.R(b).IsImm()) - gpr.BindToRegister(b, true, false); - opAddress = gpr.R(b); + opAddress = gpr.BindOrImm(b, RCMode::Read); } else { - opAddress = Imm32((u32)(s32)inst.SIMM_16); + opAddress = RCOpArg::Imm32((u32)(s32)inst.SIMM_16); } } else if (update && ((a == 0) || (d == a))) @@ -186,36 +189,40 @@ void Jit64::lXXx(UGeckoInstruction inst) } else { - if (!indexed && gpr.R(a).IsImm() && !jo.memcheck) + if (!indexed && gpr.IsImm(a) && !jo.memcheck) { - u32 val = gpr.R(a).Imm32() + inst.SIMM_16; - opAddress = Imm32(val); + u32 val = gpr.Imm32(a) + inst.SIMM_16; + opAddress = RCOpArg::Imm32(val); if (update) gpr.SetImmediate32(a, val); } - else if (indexed && gpr.R(a).IsImm() && gpr.R(b).IsImm() && !jo.memcheck) + else if (indexed && gpr.IsImm(a) && gpr.IsImm(b) && !jo.memcheck) { - u32 val = gpr.R(a).Imm32() + gpr.R(b).Imm32(); - opAddress = Imm32(val); + u32 val = gpr.Imm32(a) + gpr.Imm32(b); + opAddress = RCOpArg::Imm32(val); if (update) gpr.SetImmediate32(a, val); } else { // If we're using reg+reg mode and b is an immediate, pretend we're using constant offset mode - bool use_constant_offset = !indexed || gpr.R(b).IsImm(); + const bool use_constant_offset = !indexed || gpr.IsImm(b); s32 offset = 0; if (use_constant_offset) - offset = indexed ? gpr.R(b).SImm32() : (s32)inst.SIMM_16; + offset = indexed ? gpr.SImm32(b) : (s32)inst.SIMM_16; + + RCOpArg Rb = use_constant_offset ? RCOpArg{} : gpr.Use(b, RCMode::Read); + // Depending on whether we have an immediate and/or update, find the optimum way to calculate // the load address. if ((update || use_constant_offset) && !jo.memcheck) { - gpr.BindToRegister(a, true, update); - opAddress = gpr.R(a); + opAddress = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); + RegCache::Realize(opAddress, Rb); + if (!use_constant_offset) - ADD(32, opAddress, gpr.R(b)); + ADD(32, opAddress, Rb); else if (update) ADD(32, opAddress, Imm32((u32)offset)); else @@ -223,51 +230,36 @@ void Jit64::lXXx(UGeckoInstruction inst) } else { - // In this case we need an extra temporary register. - opAddress = R(RSCRATCH2); storeAddress = true; + // In this case we need an extra temporary register. + opAddress = RCOpArg::R(RSCRATCH2); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RegCache::Realize(opAddress, Ra, Rb); + if (use_constant_offset) - MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset)); + MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)offset)); else - MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b)); + MOV_sum(32, RSCRATCH2, Ra, Rb); } } } - gpr.Lock(a, b, d); - - if (update && storeAddress) - gpr.BindToRegister(a, true, true); - - // A bit of an evil hack here. We need to retain the original value of this register for the - // exception path, but we'd rather not needlessly pass it around if we don't have to, since - // the exception path is very rare. So we store the value in the regcache, let the load path - // clobber it, then restore the value in the exception path. - // TODO: no other load has to do this at the moment, since no other loads go directly to the - // target registers, but if that ever changes, we need to do it there too. - if (jo.memcheck) - { - gpr.StoreFromRegister(d); - js.revertGprLoad = d; - } - gpr.BindToRegister(d, false, true); + RCX64Reg Ra = (update && storeAddress) ? gpr.Bind(a, RCMode::Write) : RCX64Reg{}; + RegCache::Realize(opAddress, Ra, Rd); BitSet32 registersInUse = CallerSavedRegistersInUse(); // We need to save the (usually scratch) address register for the update. if (update && storeAddress) registersInUse[RSCRATCH2] = true; - SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); + SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, registersInUse, signExtend); if (update && storeAddress) - MOV(32, gpr.R(a), opAddress); + MOV(32, Ra, opAddress); // TODO: support no-swap in SafeLoadToReg instead if (byte_reversed) - BSWAP(accessSize, gpr.RX(d)); - - gpr.UnlockAll(); - gpr.UnlockAllX(); + BSWAP(accessSize, Rd); } void Jit64::dcbx(UGeckoInstruction inst) @@ -277,10 +269,12 @@ void Jit64::dcbx(UGeckoInstruction inst) X64Reg addr = RSCRATCH; X64Reg value = RSCRATCH2; - X64Reg tmp = gpr.GetFreeXReg(); - gpr.FlushLockX(tmp); + RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0); + RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read); + RCX64Reg tmp = gpr.Scratch(); + RegCache::Realize(Ra, Rb, tmp); - MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB)); + MOV_sum(32, addr, Ra, Rb); // Check whether a JIT cache line needs to be invalidated. LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits) @@ -305,8 +299,6 @@ void Jit64::dcbx(UGeckoInstruction inst) c = J(true); SwitchToNearCode(); SetJumpTarget(c); - - gpr.UnlockAllX(); } void Jit64::dcbt(UGeckoInstruction inst) @@ -338,10 +330,14 @@ void Jit64::dcbz(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; - MOV(32, R(RSCRATCH), gpr.R(b)); - if (a) - ADD(32, R(RSCRATCH), gpr.R(a)); - AND(32, R(RSCRATCH), Imm32(~31)); + { + RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RegCache::Realize(Ra, Rb); + + MOV_sum(32, RSCRATCH, Ra, Rb); + AND(32, R(RSCRATCH), Imm32(~31)); + } if (MSR.DR) { @@ -407,10 +403,14 @@ void Jit64::stX(UGeckoInstruction inst) } // If we already know the address of the write - if (!a || gpr.R(a).IsImm()) + if (!a || gpr.IsImm(a)) { - u32 addr = (a ? gpr.R(a).Imm32() : 0) + offset; - bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse()); + const u32 addr = (a ? gpr.Imm32(a) : 0) + offset; + const bool exception = [&] { + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Rs); + return WriteToConstAddress(accessSize, Rs, addr, CallerSavedRegistersInUse()); + }(); if (update) { if (!jo.memcheck || !exception) @@ -419,42 +419,35 @@ void Jit64::stX(UGeckoInstruction inst) } else { - gpr.KillImmediate(a, true, true); + RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite); + RegCache::Realize(Ra); MemoryExceptionCheck(); - ADD(32, gpr.R(a), Imm32((u32)offset)); + ADD(32, Ra, Imm32((u32)offset)); } } } else { - gpr.Lock(a, s); - gpr.BindToRegister(a, true, update); - if (gpr.R(s).IsImm()) + RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); + RCOpArg reg_value; + if (!gpr.IsImm(s) && WriteClobbersRegValue(accessSize, /* swap */ true)) { - SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), - SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Rs); + reg_value = RCOpArg::R(RSCRATCH2); + MOV(32, reg_value, Rs); } else { - X64Reg reg_value; - if (WriteClobbersRegValue(accessSize, /* swap */ true)) - { - MOV(32, R(RSCRATCH2), gpr.R(s)); - reg_value = RSCRATCH2; - } - else - { - gpr.BindToRegister(s, true, false); - reg_value = gpr.RX(s); - } - SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), - SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); + reg_value = gpr.BindOrImm(s, RCMode::Read); } + RegCache::Realize(Ra, reg_value); + SafeWriteRegToReg(reg_value, Ra, accessSize, offset, CallerSavedRegistersInUse(), + SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); if (update) - ADD(32, gpr.R(a), Imm32((u32)offset)); + ADD(32, Ra, Imm32((u32)offset)); } - gpr.UnlockAll(); } void Jit64::stXx(UGeckoInstruction inst) @@ -467,13 +460,6 @@ void Jit64::stXx(UGeckoInstruction inst) bool byte_reverse = !!(inst.SUBOP10 & 512); FALLBACK_IF(!a || (update && a == s) || (update && jo.memcheck && a == b)); - gpr.Lock(a, b, s); - - if (update) - gpr.BindToRegister(a, true, true); - - MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b)); - int accessSize; switch (inst.SUBOP10 & ~32) { @@ -494,39 +480,28 @@ void Jit64::stXx(UGeckoInstruction inst) break; } - if (gpr.R(s).IsImm()) + const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse); + + RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = does_clobber ? gpr.Use(s, RCMode::Read) : gpr.BindOrImm(s, RCMode::Read); + RegCache::Realize(Ra, Rb, Rs); + + MOV_sum(32, RSCRATCH2, Ra, Rb); + + if (!Rs.IsImm() && does_clobber) { - BitSet32 registersInUse = CallerSavedRegistersInUse(); - if (update) - registersInUse[RSCRATCH2] = true; - SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse, - byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); - } - else - { - X64Reg reg_value; - if (WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse)) - { - MOV(32, R(RSCRATCH), gpr.R(s)); - reg_value = RSCRATCH; - } - else - { - gpr.BindToRegister(s, true, false); - reg_value = gpr.RX(s); - } - BitSet32 registersInUse = CallerSavedRegistersInUse(); - if (update) - registersInUse[RSCRATCH2] = true; - SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse, - byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); + MOV(32, R(RSCRATCH), Rs); + Rs = RCOpArg::R(RSCRATCH); } + BitSet32 registersInUse = CallerSavedRegistersInUse(); + if (update) + registersInUse[RSCRATCH2] = true; + SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, registersInUse, + byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); if (update) - MOV(32, gpr.R(a), R(RSCRATCH2)); - - gpr.UnlockAll(); - gpr.UnlockAllX(); + MOV(32, Ra, R(RSCRATCH2)); } // A few games use these heavily in video codecs. @@ -535,18 +510,22 @@ void Jit64::lmw(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); + int a = inst.RA, d = inst.RD; + // TODO: This doesn't handle rollback on DSI correctly - MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16)); - if (inst.RA) - ADD(32, R(RSCRATCH2), gpr.R(inst.RA)); - for (int i = inst.RD; i < 32; i++) { - SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, - CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false); - gpr.BindToRegister(i, false, true); - MOV(32, gpr.R(i), R(RSCRATCH)); + RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); + RegCache::Realize(Ra); + MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)(s32)inst.SIMM_16)); + } + for (int i = d; i < 32; i++) + { + SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4, + CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false); + RCOpArg Ri = gpr.Bind(i, RCMode::Write); + RegCache::Realize(Ri); + MOV(32, Ri, R(RSCRATCH)); } - gpr.UnlockAllX(); } void Jit64::stmw(UGeckoInstruction inst) @@ -554,26 +533,27 @@ void Jit64::stmw(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); + int a = inst.RA, d = inst.RD; + // TODO: This doesn't handle rollback on DSI correctly - for (int i = inst.RD; i < 32; i++) + for (int i = d; i < 32; i++) { - if (inst.RA) - MOV(32, R(RSCRATCH), gpr.R(inst.RA)); - else + RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); + RCOpArg Ri = gpr.Use(i, RCMode::Read); + RegCache::Realize(Ra, Ri); + + if (Ra.IsZero()) XOR(32, R(RSCRATCH), R(RSCRATCH)); - if (gpr.R(i).IsImm()) - { - SafeWriteRegToReg(gpr.R(i), RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, - CallerSavedRegistersInUse()); - } else + MOV(32, R(RSCRATCH), Ra); + if (!Ri.IsImm()) { - MOV(32, R(RSCRATCH2), gpr.R(i)); - SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, - CallerSavedRegistersInUse()); + MOV(32, R(RSCRATCH2), Ri); + Ri = RCOpArg::R(RSCRATCH2); } + SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16, + CallerSavedRegistersInUse()); } - gpr.UnlockAllX(); } void Jit64::eieio(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index e89bf3f9d6..33a9f41a65 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -6,7 +6,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" using namespace Gen; @@ -30,25 +30,27 @@ void Jit64::lfXXX(UGeckoInstruction inst) FALLBACK_IF(!indexed && !a); - gpr.BindToRegister(a, true, update); - s32 offset = 0; - OpArg addr = gpr.R(a); + RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); + RegCache::Realize(addr); + if (update && jo.memcheck) { - addr = R(RSCRATCH2); - MOV(32, addr, gpr.R(a)); + MOV(32, R(RSCRATCH2), addr); + addr = RCOpArg::R(RSCRATCH2); } if (indexed) { + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RegCache::Realize(Rb); if (update) { - ADD(32, addr, gpr.R(b)); + ADD(32, addr, Rb); } else { - addr = R(RSCRATCH2); - MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b)); + MOV_sum(32, RSCRATCH2, a ? addr.Location() : Imm32(0), Rb); + addr = RCOpArg::R(RSCRATCH2); } } else @@ -59,13 +61,9 @@ void Jit64::lfXXX(UGeckoInstruction inst) offset = (s16)inst.SIMM_16; } - fpr.Lock(d); - if (jo.memcheck && single) - { - fpr.StoreFromRegister(d); - js.revertFprLoad = d; - } - fpr.BindToRegister(d, !single); + RCMode Rd_mode = !single ? RCMode::ReadWrite : RCMode::Write; + RCX64Reg Rd = jo.memcheck && single ? fpr.RevertableBind(d, Rd_mode) : fpr.Bind(d, Rd_mode); + RegCache::Realize(Rd); BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && jo.memcheck) registersInUse[RSCRATCH2] = true; @@ -73,17 +71,19 @@ void Jit64::lfXXX(UGeckoInstruction inst) if (single) { - ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true); + ConvertSingleToDouble(Rd, RSCRATCH, true); } else { MOVQ_xmm(XMM0, R(RSCRATCH)); - MOVSD(fpr.RX(d), R(XMM0)); + MOVSD(Rd, R(XMM0)); } if (update && jo.memcheck) - MOV(32, gpr.R(a), addr); - fpr.UnlockAll(); - gpr.UnlockAll(); + { + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Ra); + MOV(32, Ra, addr); + } } void Jit64::stfXXX(UGeckoInstruction inst) @@ -107,26 +107,31 @@ void Jit64::stfXXX(UGeckoInstruction inst) { if (js.op->fprIsStoreSafe[s]) { - CVTSD2SS(XMM0, fpr.R(s)); + RCOpArg Rs = fpr.Use(s, RCMode::Read); + RegCache::Realize(Rs); + CVTSD2SS(XMM0, Rs); } else { - fpr.BindToRegister(s, true, false); - ConvertDoubleToSingle(XMM0, fpr.RX(s)); + RCX64Reg Rs = fpr.Bind(s, RCMode::Read); + RegCache::Realize(Rs); + ConvertDoubleToSingle(XMM0, Rs); } MOVD_xmm(R(RSCRATCH), XMM0); } else { - if (fpr.R(s).IsSimpleReg()) - MOVQ_xmm(R(RSCRATCH), fpr.RX(s)); + RCOpArg Rs = fpr.Use(s, RCMode::Read); + RegCache::Realize(Rs); + if (Rs.IsSimpleReg()) + MOVQ_xmm(R(RSCRATCH), Rs.GetSimpleReg()); else - MOV(64, R(RSCRATCH), fpr.R(s)); + MOV(64, R(RSCRATCH), Rs); } - if (!indexed && (!a || gpr.R(a).IsImm())) + if (!indexed && (!a || gpr.IsImm(a))) { - u32 addr = (a ? gpr.R(a).Imm32() : 0) + imm; + u32 addr = (a ? gpr.Imm32(a) : 0) + imm; bool exception = WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse()); @@ -138,33 +143,34 @@ void Jit64::stfXXX(UGeckoInstruction inst) } else { - gpr.KillImmediate(a, true, true); + RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite); + RegCache::Realize(Ra); MemoryExceptionCheck(); - ADD(32, gpr.R(a), Imm32((u32)imm)); + ADD(32, Ra, Imm32((u32)imm)); } } - fpr.UnlockAll(); - gpr.UnlockAll(); return; } s32 offset = 0; - if (update) - gpr.BindToRegister(a, true, true); + RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); + RegCache::Realize(Ra); if (indexed) { - MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b)); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RegCache::Realize(Rb); + MOV_sum(32, RSCRATCH2, a ? Ra.Location() : Imm32(0), Rb); } else { if (update) { - LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm)); + MOV_sum(32, RSCRATCH2, Ra, Imm32(imm)); } else { offset = imm; - MOV(32, R(RSCRATCH2), gpr.R(a)); + MOV(32, R(RSCRATCH2), Ra); } } @@ -176,11 +182,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse); if (update) - MOV(32, gpr.R(a), R(RSCRATCH2)); - - fpr.UnlockAll(); - gpr.UnlockAll(); - gpr.UnlockAllX(); + MOV(32, Ra, R(RSCRATCH2)); } // This one is a little bit weird; it stores the low 32 bits of a double without converting it @@ -193,12 +195,16 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; - MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b)); + RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCOpArg Rs = fpr.Use(s, RCMode::Read); + RegCache::Realize(Ra, Rb, Rs); - if (fpr.R(s).IsSimpleReg()) - MOVD_xmm(R(RSCRATCH), fpr.RX(s)); + MOV_sum(32, RSCRATCH2, Ra, Rb); + + if (Rs.IsSimpleReg()) + MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg()); else - MOV(32, R(RSCRATCH), fpr.R(s)); + MOV(32, R(RSCRATCH), Rs); SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse()); - gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index bf42fa3774..05bbfd6e6c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -9,7 +9,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/PowerPC.h" @@ -40,21 +40,22 @@ void Jit64::psq_stXX(UGeckoInstruction inst) bool gqrIsConstant = it != js.constantGqr.end(); u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0; - gpr.Lock(a, b); - gpr.FlushLockX(RSCRATCH_EXTRA); - if (update) - gpr.BindToRegister(a, true, true); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); + RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); + RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset); + RCOpArg Rs = fpr.Use(s, RCMode::Read); + RegCache::Realize(scratch_guard, Ra, Rb, Rs); - MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset)); + MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb); // In memcheck mode, don't update the address until the exception check if (update && !jo.memcheck) - MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); + MOV(32, Ra, R(RSCRATCH_EXTRA)); if (w) - CVTSD2SS(XMM0, fpr.R(s)); // one + CVTSD2SS(XMM0, Rs); // one else - CVTPD2PS(XMM0, fpr.R(s)); // pair + CVTPD2PS(XMM0, Rs); // pair if (gqrIsConstant) { @@ -104,13 +105,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) if (update && jo.memcheck) { - if (indexed) - ADD(32, gpr.R(a), gpr.R(b)); - else - ADD(32, gpr.R(a), Imm32((u32)offset)); + ADD(32, Ra, Rb); } - gpr.UnlockAll(); - gpr.UnlockAllX(); } void Jit64::psq_lXX(UGeckoInstruction inst) @@ -135,17 +131,17 @@ void Jit64::psq_lXX(UGeckoInstruction inst) bool gqrIsConstant = it != js.constantGqr.end(); u32 gqrValue = gqrIsConstant ? it->second >> 16 : 0; - gpr.Lock(a, b); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); + RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); + RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset); + RCX64Reg Rs = fpr.Bind(s, RCMode::Write); + RegCache::Realize(scratch_guard, Ra, Rb, Rs); - gpr.FlushLockX(RSCRATCH_EXTRA); - gpr.BindToRegister(a, true, update); - fpr.BindToRegister(s, false, true); - - MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset)); + MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb); // In memcheck mode, don't update the address until the exception check if (update && !jo.memcheck) - MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); + MOV(32, Ra, R(RSCRATCH_EXTRA)); if (gqrIsConstant) { @@ -169,15 +165,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst) CALLptr(MatR(RSCRATCH)); } - CVTPS2PD(fpr.RX(s), R(XMM0)); + CVTPS2PD(Rs, R(XMM0)); if (update && jo.memcheck) { - if (indexed) - ADD(32, gpr.R(a), gpr.R(b)); - else - ADD(32, gpr.R(a), Imm32((u32)offset)); + ADD(32, Ra, Rb); } - - gpr.UnlockAll(); - gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 45ea288137..f75489be86 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -7,7 +7,7 @@ #include "Common/MsgHandler.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" using namespace Gen; @@ -22,8 +22,10 @@ void Jit64::ps_mr(UGeckoInstruction inst) if (d == b) return; - fpr.BindToRegister(d, false); - MOVAPD(fpr.RX(d), fpr.R(b)); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Rb, Rd); + MOVAPD(Rd, Rb); } void Jit64::ps_sum(UGeckoInstruction inst) @@ -36,43 +38,46 @@ void Jit64::ps_sum(UGeckoInstruction inst) int a = inst.FA; int b = inst.FB; int c = inst.FC; - fpr.Lock(a, b, c, d); - OpArg op_a = fpr.R(a); - fpr.BindToRegister(d, d == b || d == c); + + RCOpArg Ra = fpr.Use(a, RCMode::Read); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCOpArg Rc = fpr.Use(c, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rc, Rd); + X64Reg tmp = XMM1; - MOVDDUP(tmp, op_a); // {a.ps0, a.ps0} - ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1} + MOVDDUP(tmp, Ra); // {a.ps0, a.ps0} + ADDPD(tmp, Rb); // {a.ps0 + b.ps0, a.ps0 + b.ps1} switch (inst.SUBOP5) { case 10: // ps_sum0: {a.ps0 + b.ps1, c.ps1} - UNPCKHPD(tmp, fpr.R(c)); + UNPCKHPD(tmp, Rc); break; case 11: // ps_sum1: {c.ps0, a.ps0 + b.ps1} - if (fpr.R(c).IsSimpleReg()) + if (Rc.IsSimpleReg()) { if (cpu_info.bSSE4_1) { - BLENDPD(tmp, fpr.R(c), 1); + BLENDPD(tmp, Rc, 1); } else { - MOVAPD(XMM0, fpr.R(c)); + MOVAPD(XMM0, Rc); SHUFPD(XMM0, R(tmp), 2); tmp = XMM0; } } else { - MOVLPD(tmp, fpr.R(c)); + MOVLPD(tmp, Rc); } break; default: PanicAlert("ps_sum WTF!!!"); } - HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1); - ForceSinglePrecision(fpr.RX(d), fpr.R(d)); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); + HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1); + ForceSinglePrecision(Rd, Rd); + SetFPRFIfNeeded(Rd); } void Jit64::ps_muls(UGeckoInstruction inst) @@ -85,26 +90,29 @@ void Jit64::ps_muls(UGeckoInstruction inst) int a = inst.FA; int c = inst.FC; bool round_input = !js.op->fprIsSingle[c]; - fpr.Lock(a, c, d); + + RCOpArg Ra = fpr.Use(a, RCMode::Read); + RCOpArg Rc = fpr.Use(c, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rc, Rd); + switch (inst.SUBOP5) { case 12: // ps_muls0 - MOVDDUP(XMM1, fpr.R(c)); + MOVDDUP(XMM1, Rc); break; case 13: // ps_muls1 - avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3); + avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3); break; default: PanicAlert("ps_muls WTF!!!"); } if (round_input) Force25BitPrecision(XMM1, R(XMM1), XMM0); - MULPD(XMM1, fpr.R(a)); - fpr.BindToRegister(d, false); - HandleNaNs(inst, fpr.RX(d), XMM1); - ForceSinglePrecision(fpr.RX(d), fpr.R(d)); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); + MULPD(XMM1, Ra); + HandleNaNs(inst, Rd, XMM1); + ForceSinglePrecision(Rd, Rd); + SetFPRFIfNeeded(Rd); } void Jit64::ps_mergeXX(UGeckoInstruction inst) @@ -116,27 +124,29 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst) int d = inst.FD; int a = inst.FA; int b = inst.FB; - fpr.Lock(a, b, d); - fpr.BindToRegister(d, d == a || d == b); + + RCOpArg Ra = fpr.Use(a, RCMode::Read); + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rd); switch (inst.SUBOP10) { case 528: - avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, fpr.RX(d), fpr.R(a), fpr.R(b)); + avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, Rd, Ra, Rb); break; // 00 case 560: - avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 2); + avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 2); break; // 01 case 592: - avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 1); + avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 1); break; // 10 case 624: - avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, fpr.RX(d), fpr.R(a), fpr.R(b)); + avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, Rd, Ra, Rb); break; // 11 default: ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op"); } - fpr.UnlockAll(); } void Jit64::ps_rsqrte(UGeckoInstruction inst) @@ -147,23 +157,21 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - gpr.FlushLockX(RSCRATCH_EXTRA); - fpr.Lock(b, d); - fpr.BindToRegister(b, true, false); - fpr.BindToRegister(d, false); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); + RCX64Reg Rb = fpr.Bind(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(scratch_guard, Rb, Rd); - MOVSD(XMM0, fpr.R(b)); + MOVSD(XMM0, Rb); CALL(asm_routines.frsqrte); - MOVSD(fpr.R(d), XMM0); + MOVSD(Rd, XMM0); - MOVHLPS(XMM0, fpr.RX(b)); + MOVHLPS(XMM0, Rb); CALL(asm_routines.frsqrte); - MOVLHPS(fpr.RX(d), XMM0); + MOVLHPS(Rd, XMM0); - ForceSinglePrecision(fpr.RX(d), fpr.R(d)); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); - gpr.UnlockAllX(); + ForceSinglePrecision(Rd, Rd); + SetFPRFIfNeeded(Rd); } void Jit64::ps_res(UGeckoInstruction inst) @@ -174,23 +182,21 @@ void Jit64::ps_res(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - gpr.FlushLockX(RSCRATCH_EXTRA); - fpr.Lock(b, d); - fpr.BindToRegister(b, true, false); - fpr.BindToRegister(d, false); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); + RCX64Reg Rb = fpr.Bind(b, RCMode::Read); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(scratch_guard, Rb, Rd); - MOVSD(XMM0, fpr.R(b)); + MOVSD(XMM0, Rb); CALL(asm_routines.fres); - MOVSD(fpr.R(d), XMM0); + MOVSD(Rd, XMM0); - MOVHLPS(XMM0, fpr.RX(b)); + MOVHLPS(XMM0, Rb); CALL(asm_routines.fres); - MOVLHPS(fpr.RX(d), XMM0); + MOVLHPS(Rd, XMM0); - ForceSinglePrecision(fpr.RX(d), fpr.R(d)); - SetFPRFIfNeeded(fpr.RX(d)); - fpr.UnlockAll(); - gpr.UnlockAllX(); + ForceSinglePrecision(Rd, Rd); + SetFPRFIfNeeded(Rd); } void Jit64::ps_cmpXX(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 37631239fe..2ff1c37986 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -9,7 +9,7 @@ #include "Core/CoreTiming.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" @@ -219,26 +219,32 @@ void Jit64::mtspr(UGeckoInstruction inst) break; case SPR_XER: - gpr.Lock(d); - gpr.BindToRegister(d, true, false); - MOV(32, R(RSCRATCH), gpr.R(d)); + { + RCX64Reg Rd = gpr.Bind(d, RCMode::Read); + RegCache::Realize(Rd); + + MOV(32, R(RSCRATCH), Rd); AND(32, R(RSCRATCH), Imm32(0xff7f)); MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH)); - MOV(32, R(RSCRATCH), gpr.R(d)); + MOV(32, R(RSCRATCH), Rd); SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); AND(8, R(RSCRATCH), Imm8(1)); MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); - MOV(32, R(RSCRATCH), gpr.R(d)); + MOV(32, R(RSCRATCH), Rd); SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT)); MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH)); - gpr.UnlockAll(); + return; + } case SPR_HID0: { - MOV(32, R(RSCRATCH), gpr.R(d)); + RCOpArg Rd = gpr.Use(d, RCMode::Read); + RegCache::Realize(Rd); + + MOV(32, R(RSCRATCH), Rd); BTR(32, R(RSCRATCH), Imm8(31 - 20)); // ICFI MOV(32, PPCSTATE(spr[iIndex]), R(RSCRATCH)); FixupBranch dont_reset_icache = J_CC(CC_NC); @@ -255,13 +261,9 @@ void Jit64::mtspr(UGeckoInstruction inst) } // OK, this is easy. - if (!gpr.R(d).IsImm()) - { - gpr.Lock(d); - gpr.BindToRegister(d, true, false); - } - MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d)); - gpr.UnlockAll(); + RCOpArg Rd = gpr.BindOrImm(d, RCMode::Read); + RegCache::Realize(Rd); + MOV(32, PPCSTATE(spr[iIndex]), Rd); } void Jit64::mfspr(UGeckoInstruction inst) @@ -281,22 +283,23 @@ void Jit64::mfspr(UGeckoInstruction inst) // redundant for the JIT. // no register choice - gpr.FlushLockX(RDX, RAX); - gpr.FlushLockX(RCX); + RCX64Reg rdx = gpr.Scratch(RDX); + RCX64Reg rax = gpr.Scratch(RAX); + RCX64Reg rcx = gpr.Scratch(RCX); - MOV(64, R(RCX), ImmPtr(&CoreTiming::g)); + MOV(64, rcx, ImmPtr(&CoreTiming::g)); // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. // Scale downcount by the CPU overclocking factor. CVTSI2SS(XMM0, PPCSTATE(downcount)); - MULSS(XMM0, MDisp(RCX, offsetof(CoreTiming::Globals, last_OC_factor_inverted))); - CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor - MOV(32, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, slice_length))); - SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - - // Scaled_downcount) - ADD(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, global_timer))); - SUB(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_ticks))); + MULSS(XMM0, MDisp(rcx, offsetof(CoreTiming::Globals, last_OC_factor_inverted))); + CVTSS2SI(rdx, R(XMM0)); // RDX is downcount scaled by the overclocking factor + MOV(32, rax, MDisp(rcx, offsetof(CoreTiming::Globals, slice_length))); + SUB(64, rax, rdx); // cycles since the last CoreTiming::Advance() event is (slicelength - + // Scaled_downcount) + ADD(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, global_timer))); + SUB(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_ticks))); // It might seem convenient to correct the timer for the block position here for even more // accurate // timing, but as of currently, this can break games. If we end up reading a time *after* the @@ -307,15 +310,15 @@ void Jit64::mfspr(UGeckoInstruction inst) // Revolution, // which won't get past the loading screen. // if (js.downcountAmount) - // ADD(64, R(RAX), Imm32(js.downcountAmount)); + // ADD(64, rax, Imm32(js.downcountAmount)); // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 - MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL)); - MUL(64, R(RDX)); - MOV(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_value))); - SHR(64, R(RDX), Imm8(3)); - ADD(64, R(RAX), R(RDX)); - MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); + MOV(64, rdx, Imm64(0xAAAAAAAAAAAAAAABULL)); + MUL(64, rdx); + MOV(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_value))); + SHR(64, rdx, Imm8(3)); + ADD(64, rax, rdx); + MOV(64, PPCSTATE(spr[SPR_TL]), rax); if (CanMergeNextInstructions(1)) { @@ -330,40 +333,42 @@ void Jit64::mfspr(UGeckoInstruction inst) { js.downcountAmount++; js.skipInstructions = 1; - gpr.Lock(d, n); - gpr.BindToRegister(d, false); - gpr.BindToRegister(n, false); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RCX64Reg Rn = gpr.Bind(n, RCMode::Write); + RegCache::Realize(Rd, Rn); if (iIndex == SPR_TL) - MOV(32, gpr.R(d), R(RAX)); + MOV(32, Rd, rax); if (nextIndex == SPR_TL) - MOV(32, gpr.R(n), R(RAX)); - SHR(64, R(RAX), Imm8(32)); + MOV(32, Rn, rax); + SHR(64, rax, Imm8(32)); if (iIndex == SPR_TU) - MOV(32, gpr.R(d), R(RAX)); + MOV(32, Rd, rax); if (nextIndex == SPR_TU) - MOV(32, gpr.R(n), R(RAX)); + MOV(32, Rn, rax); break; } } - gpr.Lock(d); - gpr.BindToRegister(d, false); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Rd); if (iIndex == SPR_TU) - SHR(64, R(RAX), Imm8(32)); - MOV(32, gpr.R(d), R(RAX)); + SHR(64, rax, Imm8(32)); + MOV(32, Rd, rax); break; } case SPR_XER: - gpr.Lock(d); - gpr.BindToRegister(d, false); - MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl)); + { + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Rd); + MOVZX(32, 16, Rd, PPCSTATE(xer_stringctrl)); MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca)); SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); - OR(32, gpr.R(d), R(RSCRATCH)); + OR(32, Rd, R(RSCRATCH)); MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov)); SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT)); - OR(32, gpr.R(d), R(RSCRATCH)); + OR(32, Rd, R(RSCRATCH)); break; + } case SPR_WPAR: case SPR_DEC: case SPR_PMC1: @@ -372,26 +377,25 @@ void Jit64::mfspr(UGeckoInstruction inst) case SPR_PMC4: FALLBACK_IF(true); default: - gpr.Lock(d); - gpr.BindToRegister(d, false); - MOV(32, gpr.R(d), PPCSTATE(spr[iIndex])); + { + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Rd); + MOV(32, Rd, PPCSTATE(spr[iIndex])); break; } - gpr.UnlockAllX(); - gpr.UnlockAll(); + } } void Jit64::mtmsr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - if (!gpr.R(inst.RS).IsImm()) + { - gpr.Lock(inst.RS); - gpr.BindToRegister(inst.RS, true, false); + RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); + RegCache::Realize(Rs); + MOV(32, PPCSTATE(msr), Rs); } - MOV(32, PPCSTATE(msr), gpr.R(inst.RS)); - gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); @@ -430,10 +434,9 @@ void Jit64::mfmsr(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); // Privileged? - gpr.Lock(inst.RD); - gpr.BindToRegister(inst.RD, false, true); - MOV(32, gpr.R(inst.RD), PPCSTATE(msr)); - gpr.UnlockAll(); + RCX64Reg Rd = gpr.Bind(inst.RD, RCMode::Write); + RegCache::Realize(Rd); + MOV(32, Rd, PPCSTATE(msr)); } void Jit64::mftb(UGeckoInstruction inst) @@ -448,13 +451,13 @@ void Jit64::mfcr(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); int d = inst.RD; - gpr.FlushLockX(RSCRATCH_EXTRA); + + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); CALL(asm_routines.mfcr); - gpr.Lock(d); - gpr.BindToRegister(d, false, true); - MOV(32, gpr.R(d), R(RSCRATCH)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Rd); + MOV(32, Rd, R(RSCRATCH)); } void Jit64::mtcrf(UGeckoInstruction inst) @@ -466,13 +469,13 @@ void Jit64::mtcrf(UGeckoInstruction inst) u32 crm = inst.CRM; if (crm != 0) { - if (gpr.R(inst.RS).IsImm()) + if (gpr.IsImm(inst.RS)) { for (int i = 0; i < 8; i++) { if ((crm & (0x80 >> i)) != 0) { - u8 newcr = (gpr.R(inst.RS).Imm32() >> (28 - (i * 4))) & 0xF; + u8 newcr = (gpr.Imm32(inst.RS) >> (28 - (i * 4))) & 0xF; u64 newcrval = PowerPC::PPCCRToInternal(newcr); if ((s64)newcrval == (s32)newcrval) { @@ -489,13 +492,13 @@ void Jit64::mtcrf(UGeckoInstruction inst) else { MOV(64, R(RSCRATCH2), ImmPtr(PowerPC::m_crTable.data())); - gpr.Lock(inst.RS); - gpr.BindToRegister(inst.RS, true, false); + RCX64Reg Rs = gpr.Bind(inst.RS, RCMode::Read); + RegCache::Realize(Rs); for (int i = 0; i < 8; i++) { if ((crm & (0x80 >> i)) != 0) { - MOV(32, R(RSCRATCH), gpr.R(inst.RS)); + MOV(32, R(RSCRATCH), Rs); if (i != 7) SHR(32, R(RSCRATCH), Imm8(28 - (i * 4))); if (i != 0) @@ -504,7 +507,6 @@ void Jit64::mtcrf(UGeckoInstruction inst) MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } - gpr.UnlockAll(); } } } @@ -653,11 +655,12 @@ void Jit64::mffsx(UGeckoInstruction inst) MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); int d = inst.FD; - fpr.BindToRegister(d, false, true); + RCX64Reg Rd = fpr.Bind(d, RCMode::Write); + RegCache::Realize(Rd); MOV(64, R(RSCRATCH2), Imm64(0xFFF8000000000000)); OR(64, R(RSCRATCH), R(RSCRATCH2)); MOVQ_xmm(XMM0, R(RSCRATCH)); - MOVSD(fpr.RX(d), R(XMM0)); + MOVSD(Rd, R(XMM0)); } // MXCSR = s_fpscr_to_mxcsr[FPSCR & 7] @@ -751,10 +754,14 @@ void Jit64::mtfsfx(UGeckoInstruction inst) } int b = inst.FB; - if (fpr.R(b).IsSimpleReg()) - MOVQ_xmm(R(RSCRATCH), fpr.RX(b)); + + RCOpArg Rb = fpr.Use(b, RCMode::Read); + RegCache::Realize(Rb); + + if (Rb.IsSimpleReg()) + MOVQ_xmm(R(RSCRATCH), Rb.GetSimpleReg()); else - MOV(32, R(RSCRATCH), fpr.R(b)); + MOV(32, R(RSCRATCH), Rb); MOV(32, R(RSCRATCH2), PPCSTATE(fpscr)); AND(32, R(RSCRATCH), Imm32(mask)); diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h new file mode 100644 index 0000000000..3c7d5b4912 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h @@ -0,0 +1,284 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "Common/Assert.h" +#include "Common/CommonTypes.h" +#include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64/RegCache/RCMode.h" + +using preg_t = size_t; + +class PPCCachedReg +{ +public: + enum class LocationType + { + /// Value is currently at its default location + Default, + /// Value is currently bound to a x64 register + Bound, + /// Value is known as an immediate and has not been written back to its default location + Immediate, + /// Value is known as an immediate and is already present at its default location + SpeculativeImmediate, + }; + + PPCCachedReg() = default; + + explicit PPCCachedReg(Gen::OpArg default_location_) + : default_location(default_location_), location(default_location_) + { + } + + const Gen::OpArg& Location() const { return location; } + + LocationType GetLocationType() const + { + if (!away) + { + ASSERT(!revertable); + + if (location.IsImm()) + return LocationType::SpeculativeImmediate; + + ASSERT(location == default_location); + return LocationType::Default; + } + + ASSERT(location.IsImm() || location.IsSimpleReg()); + return location.IsImm() ? LocationType::Immediate : LocationType::Bound; + } + + bool IsAway() const { return away; } + bool IsBound() const { return GetLocationType() == LocationType::Bound; } + + void SetBoundTo(Gen::X64Reg xreg) + { + away = true; + location = Gen::R(xreg); + } + + void SetFlushed() + { + ASSERT(!revertable); + away = false; + location = default_location; + } + + void SetToImm32(u32 imm32, bool dirty = true) + { + away |= dirty; + location = Gen::Imm32(imm32); + } + + bool IsRevertable() const { return revertable; } + void SetRevertable() + { + ASSERT(IsBound()); + revertable = true; + } + void SetRevert() + { + ASSERT(revertable); + revertable = false; + SetFlushed(); + } + void SetCommit() + { + ASSERT(revertable); + revertable = false; + } + + bool IsLocked() const { return locked > 0; } + void Lock() { locked++; } + void Unlock() + { + ASSERT(IsLocked()); + locked--; + } + +private: + Gen::OpArg default_location{}; + Gen::OpArg location{}; + bool away = false; // value not in source register + bool revertable = false; + size_t locked = 0; +}; + +class X64CachedReg +{ +public: + preg_t Contents() const { return ppcReg; } + + void SetBoundTo(preg_t ppcReg_, bool dirty_) + { + free = false; + ppcReg = ppcReg_; + dirty = dirty_; + } + + void SetFlushed() + { + ppcReg = static_cast(Gen::INVALID_REG); + free = true; + dirty = false; + } + + bool IsFree() const { return free && !locked; } + + bool IsDirty() const { return dirty; } + void MakeDirty() { dirty = true; } + + bool IsLocked() const { return locked > 0; } + void Lock() { locked++; } + void Unlock() + { + ASSERT(IsLocked()); + locked--; + } + +private: + preg_t ppcReg = static_cast(Gen::INVALID_REG); + bool free = true; + bool dirty = false; + size_t locked = 0; +}; + +class RCConstraint +{ +public: + bool IsRealized() const { return realized != RealizedLoc::Invalid; } + bool IsActive() const + { + return IsRealized() || write || read || kill_imm || kill_mem || revertable; + } + + bool ShouldLoad() const { return read; } + bool ShouldDirty() const { return write; } + bool ShouldBeRevertable() const { return revertable; } + bool ShouldKillImmediate() const { return kill_imm; } + bool ShouldKillMemory() const { return kill_mem; } + + enum class RealizedLoc + { + Invalid, + Bound, + Imm, + Mem, + }; + + void Realized(RealizedLoc loc) + { + realized = loc; + ASSERT(IsRealized()); + } + + enum class ConstraintLoc + { + Bound, + BoundOrImm, + BoundOrMem, + Any, + }; + + void AddUse(RCMode mode) { AddConstraint(mode, ConstraintLoc::Any, false); } + void AddUseNoImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrMem, false); } + void AddBindOrImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrImm, false); } + void AddBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, false); } + void AddRevertableBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, true); } + +private: + void AddConstraint(RCMode mode, ConstraintLoc loc, bool should_revertable) + { + if (IsRealized()) + { + ASSERT(IsCompatible(mode, loc, should_revertable)); + return; + } + + if (should_revertable) + revertable = true; + + switch (loc) + { + case ConstraintLoc::Bound: + kill_imm = true; + kill_mem = true; + break; + case ConstraintLoc::BoundOrImm: + kill_mem = true; + break; + case ConstraintLoc::BoundOrMem: + kill_imm = true; + break; + case ConstraintLoc::Any: + break; + } + + switch (mode) + { + case RCMode::Read: + read = true; + break; + case RCMode::Write: + write = true; + break; + case RCMode::ReadWrite: + read = true; + write = true; + break; + } + } + + bool IsCompatible(RCMode mode, ConstraintLoc loc, bool should_revertable) const + { + if (should_revertable && !revertable) + { + return false; + } + + const bool is_loc_compatible = [&] { + switch (loc) + { + case ConstraintLoc::Bound: + return realized == RealizedLoc::Bound; + case ConstraintLoc::BoundOrImm: + return realized == RealizedLoc::Bound || realized == RealizedLoc::Imm; + case ConstraintLoc::BoundOrMem: + return realized == RealizedLoc::Bound || realized == RealizedLoc::Mem; + case ConstraintLoc::Any: + return true; + } + ASSERT(false); + return false; + }(); + + const bool is_mode_compatible = [&] { + switch (mode) + { + case RCMode::Read: + return read; + case RCMode::Write: + return write; + case RCMode::ReadWrite: + return read && write; + } + ASSERT(false); + return false; + }(); + + return is_loc_compatible && is_mode_compatible; + } + + RealizedLoc realized = RealizedLoc::Invalid; + bool write = false; + bool read = false; + bool kill_imm = false; + bool kill_mem = false; + bool revertable = false; +}; diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp similarity index 96% rename from Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp rename to Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index 9077f388c8..f671afb74c 100644 --- a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. -#include "Core/PowerPC/Jit64/FPURegCache.h" +#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h similarity index 92% rename from Source/Core/Core/PowerPC/Jit64/FPURegCache.h rename to Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h index 1ecbde2bea..fd7d2bb53c 100644 --- a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h @@ -4,7 +4,7 @@ #pragma once -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" class Jit64; @@ -12,9 +12,9 @@ class FPURegCache final : public RegCache { public: explicit FPURegCache(Jit64& jit); - Gen::OpArg GetDefaultLocation(preg_t preg) const override; protected: + Gen::OpArg GetDefaultLocation(preg_t preg) const override; void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override; void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override; const Gen::X64Reg* GetAllocationOrder(size_t* count) const override; diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp similarity index 97% rename from Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp rename to Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp index 18b0e70602..1671d37a51 100644 --- a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. -#include "Core/PowerPC/Jit64/GPRRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h similarity index 92% rename from Source/Core/Core/PowerPC/Jit64/GPRRegCache.h rename to Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h index 0b383cf94f..a80182ad92 100644 --- a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h @@ -4,7 +4,7 @@ #pragma once -#include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" class Jit64; @@ -12,10 +12,10 @@ class GPRRegCache final : public RegCache { public: explicit GPRRegCache(Jit64& jit); - Gen::OpArg GetDefaultLocation(preg_t preg) const override; void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true); protected: + Gen::OpArg GetDefaultLocation(preg_t preg) const override; void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override; void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override; const Gen::X64Reg* GetAllocationOrder(size_t* count) const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp new file mode 100644 index 0000000000..9b7fc14cd9 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp @@ -0,0 +1,729 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" + +#include +#include +#include +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" +#include "Common/MsgHandler.h" +#include "Common/VariantUtil.h" +#include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64/Jit.h" +#include "Core/PowerPC/Jit64/RegCache/CachedReg.h" +#include "Core/PowerPC/Jit64/RegCache/RCMode.h" +#include "Core/PowerPC/PowerPC.h" + +using namespace Gen; +using namespace PowerPC; + +RCOpArg RCOpArg::Imm32(u32 imm) +{ + return RCOpArg{imm}; +} + +RCOpArg RCOpArg::R(X64Reg xr) +{ + return RCOpArg{xr}; +} + +RCOpArg::RCOpArg() = default; + +RCOpArg::RCOpArg(u32 imm) : rc(nullptr), contents(imm) +{ +} + +RCOpArg::RCOpArg(X64Reg xr) : rc(nullptr), contents(xr) +{ +} + +RCOpArg::RCOpArg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg) +{ + rc->Lock(preg); +} + +RCOpArg::~RCOpArg() +{ + Unlock(); +} + +RCOpArg::RCOpArg(RCOpArg&& other) noexcept + : rc(std::exchange(other.rc, nullptr)), + contents(std::exchange(other.contents, std::monostate{})) +{ +} + +RCOpArg& RCOpArg::operator=(RCOpArg&& other) noexcept +{ + Unlock(); + rc = std::exchange(other.rc, nullptr); + contents = std::exchange(other.contents, std::monostate{}); + return *this; +} + +RCOpArg::RCOpArg(RCX64Reg&& other) noexcept + : rc(std::exchange(other.rc, nullptr)), + contents(VariantCast(std::exchange(other.contents, std::monostate{}))) +{ +} + +RCOpArg& RCOpArg::operator=(RCX64Reg&& other) noexcept +{ + Unlock(); + rc = std::exchange(other.rc, nullptr); + contents = VariantCast(std::exchange(other.contents, std::monostate{})); + return *this; +} + +void RCOpArg::Realize() +{ + if (const preg_t* preg = std::get_if(&contents)) + { + rc->Realize(*preg); + } +} + +OpArg RCOpArg::Location() const +{ + if (const preg_t* preg = std::get_if(&contents)) + { + ASSERT(rc->IsRealized(*preg)); + return rc->R(*preg); + } + else if (const X64Reg* xr = std::get_if(&contents)) + { + return Gen::R(*xr); + } + else if (const u32* imm = std::get_if(&contents)) + { + return Gen::Imm32(*imm); + } + ASSERT(false); + return {}; +} + +OpArg RCOpArg::ExtractWithByteOffset(int offset) +{ + if (offset == 0) + return Location(); + + ASSERT(rc); + const preg_t preg = std::get(contents); + rc->StoreFromRegister(preg, RegCache::FlushMode::MaintainState); + OpArg result = rc->GetDefaultLocation(preg); + result.AddMemOffset(offset); + return result; +} + +void RCOpArg::Unlock() +{ + if (const preg_t* preg = std::get_if(&contents)) + { + ASSERT(rc); + rc->Unlock(*preg); + } + else if (const X64Reg* xr = std::get_if(&contents)) + { + // If rc, we got this from an RCX64Reg. + // If !rc, we got this from RCOpArg::R. + if (rc) + rc->UnlockX(*xr); + } + else + { + ASSERT(!rc); + } + + rc = nullptr; + contents = std::monostate{}; +} + +bool RCOpArg::IsImm() const +{ + if (const preg_t* preg = std::get_if(&contents)) + { + return rc->R(*preg).IsImm(); + } + else if (std::holds_alternative(contents)) + { + return true; + } + return false; +} + +s32 RCOpArg::SImm32() const +{ + if (const preg_t* preg = std::get_if(&contents)) + { + return rc->R(*preg).SImm32(); + } + else if (const u32* imm = std::get_if(&contents)) + { + return static_cast(*imm); + } + ASSERT(false); + return 0; +} + +u32 RCOpArg::Imm32() const +{ + if (const preg_t* preg = std::get_if(&contents)) + { + return rc->R(*preg).Imm32(); + } + else if (const u32* imm = std::get_if(&contents)) + { + return *imm; + } + ASSERT(false); + return 0; +} + +RCX64Reg::RCX64Reg() = default; + +RCX64Reg::RCX64Reg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg) +{ + rc->Lock(preg); +} + +RCX64Reg::RCX64Reg(RegCache* rc_, X64Reg xr) : rc(rc_), contents(xr) +{ + rc->LockX(xr); +} + +RCX64Reg::~RCX64Reg() +{ + Unlock(); +} + +RCX64Reg::RCX64Reg(RCX64Reg&& other) noexcept + : rc(std::exchange(other.rc, nullptr)), + contents(std::exchange(other.contents, std::monostate{})) +{ +} + +RCX64Reg& RCX64Reg::operator=(RCX64Reg&& other) noexcept +{ + Unlock(); + rc = std::exchange(other.rc, nullptr); + contents = std::exchange(other.contents, std::monostate{}); + return *this; +} + +void RCX64Reg::Realize() +{ + if (const preg_t* preg = std::get_if(&contents)) + { + rc->Realize(*preg); + } +} + +RCX64Reg::operator X64Reg() const & +{ + if (const preg_t* preg = std::get_if(&contents)) + { + ASSERT(rc->IsRealized(*preg)); + return rc->RX(*preg); + } + else if (const X64Reg* xr = std::get_if(&contents)) + { + return *xr; + } + ASSERT(false); + return {}; +} + +RCX64Reg::operator OpArg() const & +{ + return Gen::R(RCX64Reg::operator X64Reg()); +} + +void RCX64Reg::Unlock() +{ + if (const preg_t* preg = std::get_if(&contents)) + { + ASSERT(rc); + rc->Unlock(*preg); + } + else if (const X64Reg* xr = std::get_if(&contents)) + { + ASSERT(rc); + rc->UnlockX(*xr); + } + else + { + ASSERT(!rc); + } + + rc = nullptr; + contents = std::monostate{}; +} + +RCForkGuard::RCForkGuard(RegCache& rc_) : rc(&rc_), m_regs(rc_.m_regs), m_xregs(rc_.m_xregs) +{ + ASSERT(!rc->IsAnyConstraintActive()); +} + +RCForkGuard::RCForkGuard(RCForkGuard&& other) noexcept + : rc(other.rc), m_regs(std::move(other.m_regs)), m_xregs(std::move(other.m_xregs)) +{ + other.rc = nullptr; +} + +void RCForkGuard::EndFork() +{ + if (!rc) + return; + + ASSERT(!rc->IsAnyConstraintActive()); + rc->m_regs = m_regs; + rc->m_xregs = m_xregs; + rc = nullptr; +} + +RegCache::RegCache(Jit64& jit) : m_jit{jit} +{ +} + +void RegCache::Start() +{ + m_xregs.fill({}); + for (size_t i = 0; i < m_regs.size(); i++) + { + m_regs[i] = PPCCachedReg{GetDefaultLocation(i)}; + } +} + +void RegCache::SetEmitter(XEmitter* emitter) +{ + m_emitter = emitter; +} + +bool RegCache::SanityCheck() const +{ + for (size_t i = 0; i < m_regs.size(); i++) + { + switch (m_regs[i].GetLocationType()) + { + case PPCCachedReg::LocationType::Default: + case PPCCachedReg::LocationType::SpeculativeImmediate: + case PPCCachedReg::LocationType::Immediate: + break; + case PPCCachedReg::LocationType::Bound: + { + if (m_regs[i].IsLocked() || m_regs[i].IsRevertable()) + return false; + + Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg(); + if (m_xregs[xr].IsLocked()) + return false; + if (m_xregs[xr].Contents() != i) + return false; + break; + } + } + } + return true; +} + +RCOpArg RegCache::Use(preg_t preg, RCMode mode) +{ + m_constraints[preg].AddUse(mode); + return RCOpArg{this, preg}; +} + +RCOpArg RegCache::UseNoImm(preg_t preg, RCMode mode) +{ + m_constraints[preg].AddUseNoImm(mode); + return RCOpArg{this, preg}; +} + +RCOpArg RegCache::BindOrImm(preg_t preg, RCMode mode) +{ + m_constraints[preg].AddBindOrImm(mode); + return RCOpArg{this, preg}; +} + +RCX64Reg RegCache::Bind(preg_t preg, RCMode mode) +{ + m_constraints[preg].AddBind(mode); + return RCX64Reg{this, preg}; +} + +RCX64Reg RegCache::RevertableBind(preg_t preg, RCMode mode) +{ + m_constraints[preg].AddRevertableBind(mode); + return RCX64Reg{this, preg}; +} + +RCX64Reg RegCache::Scratch() +{ + return Scratch(GetFreeXReg()); +} + +RCX64Reg RegCache::Scratch(X64Reg xr) +{ + FlushX(xr); + return RCX64Reg{this, xr}; +} + +RCForkGuard RegCache::Fork() +{ + return RCForkGuard{*this}; +} + +void RegCache::Flush(BitSet32 pregs) +{ + ASSERT_MSG( + DYNA_REC, + std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }), + "Someone forgot to unlock a X64 reg"); + + for (preg_t i : pregs) + { + ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(), + "Someone forgot to unlock PPC reg %zu (X64 reg %i).", i, RX(i)); + ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!"); + + switch (m_regs[i].GetLocationType()) + { + case PPCCachedReg::LocationType::Default: + break; + case PPCCachedReg::LocationType::SpeculativeImmediate: + // We can have a cached value without a host register through speculative constants. + // It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE, + // if PPCSTATE is modified externally (e.g. fallback to interpreter). + m_regs[i].SetFlushed(); + break; + case PPCCachedReg::LocationType::Bound: + case PPCCachedReg::LocationType::Immediate: + StoreFromRegister(i); + break; + } + } +} + +void RegCache::Revert() +{ + ASSERT(IsAllUnlocked()); + for (auto& reg : m_regs) + { + if (reg.IsRevertable()) + reg.SetRevert(); + } +} + +void RegCache::Commit() +{ + ASSERT(IsAllUnlocked()); + for (auto& reg : m_regs) + { + if (reg.IsRevertable()) + reg.SetCommit(); + } +} + +bool RegCache::IsAllUnlocked() const +{ + return std::none_of(m_regs.begin(), m_regs.end(), [](const auto& r) { return r.IsLocked(); }) && + std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }) && + !IsAnyConstraintActive(); +} + +void RegCache::PreloadRegisters(BitSet32 to_preload) +{ + for (preg_t preg : to_preload) + { + if (NumFreeRegisters() < 2) + return; + if (!R(preg).IsImm()) + BindToRegister(preg, true, false); + } +} + +BitSet32 RegCache::RegistersInUse() const +{ + BitSet32 result; + for (size_t i = 0; i < m_xregs.size(); i++) + { + if (!m_xregs[i].IsFree()) + result[i] = true; + } + return result; +} + +void RegCache::FlushX(X64Reg reg) +{ + ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg); + ASSERT(!m_xregs[reg].IsLocked()); + if (!m_xregs[reg].IsFree()) + { + StoreFromRegister(m_xregs[reg].Contents()); + } +} + +void RegCache::DiscardRegContentsIfCached(preg_t preg) +{ + if (m_regs[preg].IsBound()) + { + X64Reg xr = m_regs[preg].Location().GetSimpleReg(); + m_xregs[xr].SetFlushed(); + m_regs[preg].SetFlushed(); + } +} + +void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) +{ + if (!m_regs[i].IsBound()) + { + X64Reg xr = GetFreeXReg(); + + ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr); + ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register"); + ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state"); + + m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway()); + + if (doLoad) + { + LoadRegister(i, xr); + } + + ASSERT_MSG(DYNA_REC, + std::none_of(m_regs.begin(), m_regs.end(), + [xr](const auto& r) { return r.Location().IsSimpleReg(xr); }), + "Xreg %i already bound", xr); + + m_regs[i].SetBoundTo(xr); + } + else + { + // reg location must be simplereg; memory locations + // and immediates are taken care of above. + if (makeDirty) + m_xregs[RX(i)].MakeDirty(); + } + + ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed"); +} + +void RegCache::StoreFromRegister(preg_t i, FlushMode mode) +{ + // When a transaction is in progress, allowing the store would overwrite the old value. + ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!"); + + bool doStore = false; + + switch (m_regs[i].GetLocationType()) + { + case PPCCachedReg::LocationType::Default: + case PPCCachedReg::LocationType::SpeculativeImmediate: + return; + case PPCCachedReg::LocationType::Bound: + { + X64Reg xr = RX(i); + doStore = m_xregs[xr].IsDirty(); + if (mode == FlushMode::Full) + m_xregs[xr].SetFlushed(); + break; + } + case PPCCachedReg::LocationType::Immediate: + doStore = true; + break; + } + + if (doStore) + StoreRegister(i, GetDefaultLocation(i)); + if (mode == FlushMode::Full) + m_regs[i].SetFlushed(); +} + +X64Reg RegCache::GetFreeXReg() +{ + size_t aCount; + const X64Reg* aOrder = GetAllocationOrder(&aCount); + for (size_t i = 0; i < aCount; i++) + { + X64Reg xr = aOrder[i]; + if (m_xregs[xr].IsFree()) + { + return xr; + } + } + + // Okay, not found; run the register allocator heuristic and figure out which register we should + // clobber. + float min_score = std::numeric_limits::max(); + X64Reg best_xreg = INVALID_REG; + size_t best_preg = 0; + for (size_t i = 0; i < aCount; i++) + { + X64Reg xreg = (X64Reg)aOrder[i]; + preg_t preg = m_xregs[xreg].Contents(); + if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked()) + continue; + float score = ScoreRegister(xreg); + if (score < min_score) + { + min_score = score; + best_xreg = xreg; + best_preg = preg; + } + } + + if (best_xreg != INVALID_REG) + { + StoreFromRegister(best_preg); + return best_xreg; + } + + // Still no dice? Die! + ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs"); + return INVALID_REG; +} + +int RegCache::NumFreeRegisters() const +{ + int count = 0; + size_t aCount; + const X64Reg* aOrder = GetAllocationOrder(&aCount); + for (size_t i = 0; i < aCount; i++) + if (m_xregs[aOrder[i]].IsFree()) + count++; + return count; +} + +// Estimate roughly how bad it would be to de-allocate this register. Higher score +// means more bad. +float RegCache::ScoreRegister(X64Reg xreg) const +{ + preg_t preg = m_xregs[xreg].Contents(); + float score = 0; + + // If it's not dirty, we don't need a store to write it back to the register file, so + // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly + // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative + // to the number of extra stores it causes. + if (m_xregs[xreg].IsDirty()) + score += 2; + + // If the register isn't actually needed in a physical register for a later instruction, + // writing it back to the register file isn't quite as bad. + if (GetRegUtilization()[preg]) + { + // Don't look too far ahead; we don't want to have quadratic compilation times for + // enormous block sizes! + // This actually improves register allocation a tiny bit; I'm not sure why. + u32 lookahead = std::min(m_jit.js.instructionsLeft, 64); + // Count how many other registers are going to be used before we need this one again. + u32 regs_in_count = CountRegsIn(preg, lookahead).Count(); + // Totally ad-hoc heuristic to bias based on how many other registers we'll need + // before this one gets used again. + score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count)); + } + + return score; +} + +const OpArg& RegCache::R(preg_t preg) const +{ + return m_regs[preg].Location(); +} + +X64Reg RegCache::RX(preg_t preg) const +{ + ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg); + return m_regs[preg].Location().GetSimpleReg(); +} + +void RegCache::Lock(preg_t preg) +{ + m_regs[preg].Lock(); +} + +void RegCache::Unlock(preg_t preg) +{ + m_regs[preg].Unlock(); + if (!m_regs[preg].IsLocked()) + { + // Fully unlocked, reset realization state. + m_constraints[preg] = {}; + } +} + +void RegCache::LockX(X64Reg xr) +{ + m_xregs[xr].Lock(); +} + +void RegCache::UnlockX(X64Reg xr) +{ + m_xregs[xr].Unlock(); +} + +bool RegCache::IsRealized(preg_t preg) const +{ + return m_constraints[preg].IsRealized(); +} + +void RegCache::Realize(preg_t preg) +{ + if (m_constraints[preg].IsRealized()) + return; + + const bool load = m_constraints[preg].ShouldLoad(); + const bool dirty = m_constraints[preg].ShouldDirty(); + const bool kill_imm = m_constraints[preg].ShouldKillImmediate(); + const bool kill_mem = m_constraints[preg].ShouldKillMemory(); + + const auto do_bind = [&] { + BindToRegister(preg, load, dirty); + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Bound); + }; + + if (m_constraints[preg].ShouldBeRevertable()) + { + StoreFromRegister(preg, FlushMode::MaintainState); + do_bind(); + m_regs[preg].SetRevertable(); + return; + } + + switch (m_regs[preg].GetLocationType()) + { + case PPCCachedReg::LocationType::Default: + if (kill_mem) + { + do_bind(); + return; + } + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem); + return; + case PPCCachedReg::LocationType::Bound: + do_bind(); + return; + case PPCCachedReg::LocationType::Immediate: + case PPCCachedReg::LocationType::SpeculativeImmediate: + if (dirty || kill_imm) + { + do_bind(); + return; + } + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm); + break; + } +} + +bool RegCache::IsAnyConstraintActive() const +{ + return std::any_of(m_constraints.begin(), m_constraints.end(), + [](const auto& c) { return c.IsActive(); }); +} diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h new file mode 100644 index 0000000000..522eb513bb --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h @@ -0,0 +1,222 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64/RegCache/CachedReg.h" +#include "Core/PowerPC/PPCAnalyst.h" + +class Jit64; +enum class RCMode; + +class RCOpArg; +class RCX64Reg; +class RegCache; + +using preg_t = size_t; +static constexpr size_t NUM_XREGS = 16; + +class RCOpArg +{ +public: + static RCOpArg Imm32(u32 imm); + static RCOpArg R(Gen::X64Reg xr); + RCOpArg(); + ~RCOpArg(); + RCOpArg(RCOpArg&&) noexcept; + RCOpArg& operator=(RCOpArg&&) noexcept; + + RCOpArg(RCX64Reg&&) noexcept; + RCOpArg& operator=(RCX64Reg&&) noexcept; + + RCOpArg(const RCOpArg&) = delete; + RCOpArg& operator=(const RCOpArg&) = delete; + + void Realize(); + Gen::OpArg Location() const; + operator Gen::OpArg() const & { return Location(); } + operator Gen::OpArg() const && = delete; + bool IsSimpleReg() const { return Location().IsSimpleReg(); } + bool IsSimpleReg(Gen::X64Reg reg) const { return Location().IsSimpleReg(reg); } + Gen::X64Reg GetSimpleReg() const { return Location().GetSimpleReg(); } + + // Use to extract bytes from a register using the regcache. offset is in bytes. + Gen::OpArg ExtractWithByteOffset(int offset); + + void Unlock(); + + bool IsImm() const; + s32 SImm32() const; + u32 Imm32() const; + bool IsZero() const { return IsImm() && Imm32() == 0; } + +private: + friend class RegCache; + + explicit RCOpArg(u32 imm); + explicit RCOpArg(Gen::X64Reg xr); + RCOpArg(RegCache* rc_, preg_t preg); + + RegCache* rc = nullptr; + std::variant contents; +}; + +class RCX64Reg +{ +public: + RCX64Reg(); + ~RCX64Reg(); + RCX64Reg(RCX64Reg&&) noexcept; + RCX64Reg& operator=(RCX64Reg&&) noexcept; + + RCX64Reg(const RCX64Reg&) = delete; + RCX64Reg& operator=(const RCX64Reg&) = delete; + + void Realize(); + operator Gen::OpArg() const &; + operator Gen::X64Reg() const &; + operator Gen::OpArg() const && = delete; + operator Gen::X64Reg() const && = delete; + + void Unlock(); + +private: + friend class RegCache; + friend class RCOpArg; + + RCX64Reg(RegCache* rc_, preg_t preg); + RCX64Reg(RegCache* rc_, Gen::X64Reg xr); + + RegCache* rc = nullptr; + std::variant contents; +}; + +class RCForkGuard +{ +public: + ~RCForkGuard() { EndFork(); } + RCForkGuard(RCForkGuard&&) noexcept; + + RCForkGuard(const RCForkGuard&) = delete; + RCForkGuard& operator=(const RCForkGuard&) = delete; + RCForkGuard& operator=(RCForkGuard&&) = delete; + + void EndFork(); + +private: + friend class RegCache; + + explicit RCForkGuard(RegCache& rc_); + + RegCache* rc; + std::array m_regs; + std::array m_xregs; +}; + +class RegCache +{ +public: + enum class FlushMode + { + Full, + MaintainState, + }; + + explicit RegCache(Jit64& jit); + virtual ~RegCache() = default; + + void Start(); + void SetEmitter(Gen::XEmitter* emitter); + bool SanityCheck() const; + + template + static void Realize(Ts&... rc) + { + static_assert(((std::is_same() || std::is_same()) && ...)); + (rc.Realize(), ...); + } + + template + static void Unlock(Ts&... rc) + { + static_assert(((std::is_same() || std::is_same()) && ...)); + (rc.Unlock(), ...); + } + + template + bool IsImm(Args... pregs) const + { + static_assert(sizeof...(pregs) > 0); + return (R(pregs).IsImm() && ...); + } + u32 Imm32(preg_t preg) const { return R(preg).Imm32(); } + s32 SImm32(preg_t preg) const { return R(preg).SImm32(); } + + RCOpArg Use(preg_t preg, RCMode mode); + RCOpArg UseNoImm(preg_t preg, RCMode mode); + RCOpArg BindOrImm(preg_t preg, RCMode mode); + RCX64Reg Bind(preg_t preg, RCMode mode); + RCX64Reg RevertableBind(preg_t preg, RCMode mode); + RCX64Reg Scratch(); + RCX64Reg Scratch(Gen::X64Reg xr); + + RCForkGuard Fork(); + void Flush(BitSet32 pregs = BitSet32::AllTrue(32)); + void Revert(); + void Commit(); + + bool IsAllUnlocked() const; + + void PreloadRegisters(BitSet32 pregs); + BitSet32 RegistersInUse() const; + +protected: + friend class RCOpArg; + friend class RCX64Reg; + friend class RCForkGuard; + + virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0; + virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0; + virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0; + + virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0; + + virtual BitSet32 GetRegUtilization() const = 0; + virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0; + + void FlushX(Gen::X64Reg reg); + void DiscardRegContentsIfCached(preg_t preg); + void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true); + void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full); + + Gen::X64Reg GetFreeXReg(); + + int NumFreeRegisters() const; + float ScoreRegister(Gen::X64Reg xreg) const; + + const Gen::OpArg& R(preg_t preg) const; + Gen::X64Reg RX(preg_t preg) const; + + void Lock(preg_t preg); + void Unlock(preg_t preg); + void LockX(Gen::X64Reg xr); + void UnlockX(Gen::X64Reg xr); + bool IsRealized(preg_t preg) const; + void Realize(preg_t preg); + + bool IsAnyConstraintActive() const; + + Jit64& m_jit; + std::array m_regs; + std::array m_xregs; + std::array m_constraints; + Gen::XEmitter* m_emitter = nullptr; +}; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/RCMode.h b/Source/Core/Core/PowerPC/Jit64/RegCache/RCMode.h new file mode 100644 index 0000000000..efe72ac4f5 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/RCMode.h @@ -0,0 +1,12 @@ +// Copyright 2018 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +enum class RCMode +{ + Read, + Write, + ReadWrite, +}; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 90cd71ac87..062caee096 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -70,11 +70,6 @@ protected: // so just fixup that branch instead of testing for a DSI again. bool fixupExceptionHandler; Gen::FixupBranch exceptionHandler; - // If these are set, we've stored the old value of a register which will be loaded in - // revertLoad, - // which lets us revert it on the exception path. - int revertGprLoad; - int revertFprLoad; bool assumeNoPairedQuantize; std::map constantGqr;