diff --git a/Source/Core/Common/BitSet.h b/Source/Core/Common/BitSet.h
index 4425b6067c..5154735f86 100644
--- a/Source/Core/Common/BitSet.h
+++ b/Source/Core/Common/BitSet.h
@@ -193,10 +193,14 @@ public:
constexpr BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
constexpr BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
constexpr BitSet operator~() const { return BitSet(~m_val); }
+ constexpr BitSet operator<<(IntTy shift) const { return BitSet(m_val << shift); }
+ constexpr BitSet operator>>(IntTy shift) const { return BitSet(m_val >> shift); }
constexpr explicit operator bool() const { return m_val != 0; }
BitSet& operator|=(BitSet other) { return *this = *this | other; }
BitSet& operator&=(BitSet other) { return *this = *this & other; }
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
+ BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }
+ BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; }
// Warning: Even though on modern CPUs this is a single fast instruction,
// Dolphin's official builds do not currently assume POPCNT support on x86,
// so slower explicit bit twiddling is generated. Still should generally
diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj
index 23752002cf..409546e43b 100644
--- a/Source/Core/Common/Common.vcxproj
+++ b/Source/Core/Common/Common.vcxproj
@@ -159,6 +159,7 @@
+
diff --git a/Source/Core/Common/VariantUtil.h b/Source/Core/Common/VariantUtil.h
new file mode 100644
index 0000000000..c865ad9681
--- /dev/null
+++ b/Source/Core/Common/VariantUtil.h
@@ -0,0 +1,26 @@
+// Copyright 2018 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include
+
+namespace detail
+{
+template
+struct VariantCastProxy
+{
+ const std::variant& v;
+
+ template
+ operator std::variant() const
+ {
+ return std::visit([](auto&& arg) { return std::variant{arg}; }, v);
+ }
+};
+} // namespace detail
+
+template
+auto VariantCast(const std::variant& v)
+{
+ return detail::VariantCastProxy{v};
+}
diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt
index 003f2122ec..30ae5e0946 100644
--- a/Source/Core/Core/CMakeLists.txt
+++ b/Source/Core/Core/CMakeLists.txt
@@ -236,20 +236,20 @@ if(_M_X86)
DSP/Jit/x64/DSPJitMultiplier.cpp
DSP/Jit/x64/DSPJitTables.cpp
DSP/Jit/x64/DSPJitUtil.cpp
- PowerPC/Jit64/FPURegCache.cpp
- PowerPC/Jit64/GPRRegCache.cpp
- PowerPC/Jit64/Jit64_Tables.cpp
- PowerPC/Jit64/JitAsm.cpp
- PowerPC/Jit64/Jit_Branch.cpp
PowerPC/Jit64/Jit.cpp
+ PowerPC/Jit64/Jit64_Tables.cpp
+ PowerPC/Jit64/Jit_Branch.cpp
PowerPC/Jit64/Jit_FloatingPoint.cpp
PowerPC/Jit64/Jit_Integer.cpp
PowerPC/Jit64/Jit_LoadStore.cpp
PowerPC/Jit64/Jit_LoadStoreFloating.cpp
PowerPC/Jit64/Jit_LoadStorePaired.cpp
PowerPC/Jit64/Jit_Paired.cpp
- PowerPC/Jit64/JitRegCache.cpp
PowerPC/Jit64/Jit_SystemRegisters.cpp
+ PowerPC/Jit64/JitAsm.cpp
+ PowerPC/Jit64/RegCache/FPURegCache.cpp
+ PowerPC/Jit64/RegCache/GPRRegCache.cpp
+ PowerPC/Jit64/RegCache/JitRegCache.cpp
PowerPC/Jit64Common/BlockCache.cpp
PowerPC/Jit64Common/ConstantPool.cpp
PowerPC/Jit64Common/EmuCodeBlock.cpp
diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj
index ae2b84308d..516848d8c2 100644
--- a/Source/Core/Core/Core.vcxproj
+++ b/Source/Core/Core/Core.vcxproj
@@ -241,8 +241,8 @@
-
+
@@ -260,13 +260,8 @@
-
-
-
-
-
@@ -275,7 +270,12 @@
+
+
+
+
+
@@ -284,10 +284,6 @@
-
-
-
-
@@ -295,6 +291,10 @@
+
+
+
+
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
index c4cfefd77a..2cdad6e54d 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
@@ -27,7 +27,7 @@
#include "Core/HW/ProcessorInterface.h"
#include "Core/PatchEngine.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/FarCodeCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/Jit64Common/TrampolineCache.h"
@@ -756,8 +756,6 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.downcountAmount += opinfo->numCycles;
js.fastmemLoadStore = nullptr;
js.fixupExceptionHandler = false;
- js.revertGprLoad = -1;
- js.revertFprLoad = -1;
if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
@@ -800,13 +798,17 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, true);
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
+ {
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
- MOV(32, PPCSTATE(pc), Imm32(op.address));
- WriteExternalExceptionExit();
+ gpr.Flush();
+ fpr.Flush();
+
+ MOV(32, PPCSTATE(pc), Imm32(op.address));
+ WriteExternalExceptionExit();
+ }
SwitchToNearCode();
-
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
}
@@ -824,14 +826,19 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
SwitchToFarCode();
SetJumpTarget(b1);
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
+ {
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
- // If a FPU exception occurs, the exception handler will read
- // from PC. Update PC with the latest value in case that happens.
- MOV(32, PPCSTATE(pc), Imm32(op.address));
- OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
- WriteExceptionExit();
+ gpr.Flush();
+ fpr.Flush();
+
+ // If a FPU exception occurs, the exception handler will read
+ // from PC. Update PC with the latest value in case that happens.
+ MOV(32, PPCSTATE(pc), Imm32(op.address));
+ OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
+ WriteExceptionExit();
+ }
SwitchToNearCode();
js.firstFPInstructionFound = true;
@@ -866,20 +873,8 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
- for (int reg : op.regsIn)
- {
- if (gpr.NumFreeRegisters() < 2)
- break;
- if (op.gprInReg[reg] && !gpr.R(reg).IsImm())
- gpr.BindToRegister(reg, true, false);
- }
- for (int reg : op.fregsIn)
- {
- if (fpr.NumFreeRegisters() < 2)
- break;
- if (op.fprInXmm[reg])
- fpr.BindToRegister(reg, true, false);
- }
+ gpr.PreloadRegisters(op.regsIn & op.gprInReg);
+ fpr.PreloadRegisters(op.fregsIn & op.fprInXmm);
CompileInstruction(op);
@@ -908,24 +903,25 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
}
- BitSet32 gprToFlush = BitSet32::AllTrue(32);
- BitSet32 fprToFlush = BitSet32::AllTrue(32);
- if (js.revertGprLoad >= 0)
- gprToFlush[js.revertGprLoad] = false;
- if (js.revertFprLoad >= 0)
- fprToFlush[js.revertFprLoad] = false;
- gpr.Flush(RegCache::FlushMode::MaintainState, gprToFlush);
- fpr.Flush(RegCache::FlushMode::MaintainState, fprToFlush);
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
+
+ gpr.Revert();
+ fpr.Revert();
+ gpr.Flush();
+ fpr.Flush();
+
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExceptionExit();
SwitchToNearCode();
}
+ gpr.Commit();
+ fpr.Commit();
+
// If we have a register that will never be used again, flush it.
- for (int j : ~op.gprInUse)
- gpr.StoreFromRegister(j);
- for (int j : ~op.fprInUse)
- fpr.StoreFromRegister(j);
+ gpr.Flush(~op.gprInUse);
+ fpr.Flush(~op.fprInUse);
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
@@ -969,15 +965,8 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const
BitSet32 Jit64::CallerSavedRegistersInUse() const
{
- BitSet32 result;
- for (size_t i = 0; i < RegCache::NUM_XREGS; i++)
- {
- if (!gpr.IsFreeX(i))
- result[i] = true;
- if (!fpr.IsFreeX(i))
- result[16 + i] = true;
- }
- return result & ABI_ALL_CALLER_SAVED;
+ BitSet32 in_use = gpr.RegistersInUse() | (fpr.RegistersInUse() << 16);
+ return in_use & ABI_ALL_CALLER_SAVED;
}
void Jit64::EnableBlockLink()
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 068d42290c..35b356f32d 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -21,10 +21,10 @@
#include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
-#include "Core/PowerPC/Jit64/FPURegCache.h"
-#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
@@ -88,10 +88,8 @@ public:
void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
- void ComputeRC(const Gen::OpArg& arg, bool needs_test = true, bool needs_sext = true);
+ void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true);
- // Use to extract bytes from a register using the regcache. offset is in bytes.
- Gen::OpArg ExtractFromReg(int reg, int offset);
void AndWithMask(Gen::X64Reg reg, u32 mask);
bool CheckMergedBranch(u32 crf) const;
void DoMergedBranch();
diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp
deleted file mode 100644
index 3b65768db3..0000000000
--- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp
+++ /dev/null
@@ -1,324 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#include "Core/PowerPC/Jit64/JitRegCache.h"
-
-#include
-#include
-#include
-#include
-
-#include "Common/Assert.h"
-#include "Common/BitSet.h"
-#include "Common/CommonTypes.h"
-#include "Common/MsgHandler.h"
-#include "Common/x64Emitter.h"
-#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/PowerPC.h"
-
-using namespace Gen;
-using namespace PowerPC;
-
-RegCache::RegCache(Jit64& jit) : m_jit{jit}
-{
-}
-
-void RegCache::Start()
-{
- m_xregs.fill({});
- for (size_t i = 0; i < m_regs.size(); i++)
- {
- m_regs[i] = PPCCachedReg{GetDefaultLocation(i)};
- }
-}
-
-void RegCache::DiscardRegContentsIfCached(preg_t preg)
-{
- if (m_regs[preg].IsBound())
- {
- X64Reg xr = m_regs[preg].Location().GetSimpleReg();
- m_xregs[xr].SetFlushed();
- m_regs[preg].SetFlushed();
- }
-}
-
-void RegCache::SetEmitter(XEmitter* emitter)
-{
- m_emitter = emitter;
-}
-
-void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
-{
- ASSERT_MSG(
- DYNA_REC,
- std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
- "Someone forgot to unlock a X64 reg");
-
- for (unsigned int i : regsToFlush)
- {
- ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(), "Someone forgot to unlock PPC reg %u (X64 reg %i).",
- i, RX(i));
-
- switch (m_regs[i].GetLocationType())
- {
- case PPCCachedReg::LocationType::Default:
- break;
- case PPCCachedReg::LocationType::SpeculativeImmediate:
- // We can have a cached value without a host register through speculative constants.
- // It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
- // if PPCSTATE is modified externally (e.g. fallback to interpreter).
- m_regs[i].SetFlushed();
- break;
- case PPCCachedReg::LocationType::Bound:
- case PPCCachedReg::LocationType::Immediate:
- StoreFromRegister(i, mode);
- break;
- }
- }
-}
-
-void RegCache::FlushLockX(X64Reg reg)
-{
- FlushX(reg);
- LockX(reg);
-}
-
-void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2)
-{
- FlushX(reg1);
- FlushX(reg2);
- LockX(reg1);
- LockX(reg2);
-}
-
-bool RegCache::SanityCheck() const
-{
- for (size_t i = 0; i < m_regs.size(); i++)
- {
- switch (m_regs[i].GetLocationType())
- {
- case PPCCachedReg::LocationType::Default:
- case PPCCachedReg::LocationType::SpeculativeImmediate:
- case PPCCachedReg::LocationType::Immediate:
- break;
- case PPCCachedReg::LocationType::Bound:
- {
- if (m_regs[i].IsLocked())
- return false;
-
- Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
- if (m_xregs[xr].IsLocked())
- return false;
- if (m_xregs[xr].Contents() != i)
- return false;
- break;
- }
- }
- }
- return true;
-}
-
-void RegCache::KillImmediate(preg_t preg, bool doLoad, bool makeDirty)
-{
- switch (m_regs[preg].GetLocationType())
- {
- case PPCCachedReg::LocationType::Default:
- case PPCCachedReg::LocationType::SpeculativeImmediate:
- break;
- case PPCCachedReg::LocationType::Bound:
- if (makeDirty)
- m_xregs[RX(preg)].MakeDirty();
- break;
- case PPCCachedReg::LocationType::Immediate:
- BindToRegister(preg, doLoad, makeDirty);
- break;
- }
-}
-
-void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
-{
- if (!m_regs[i].IsBound())
- {
- X64Reg xr = GetFreeXReg();
-
- ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr);
- ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
-
- m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
-
- if (doLoad)
- {
- LoadRegister(i, xr);
- }
-
- ASSERT_MSG(DYNA_REC,
- std::none_of(m_regs.begin(), m_regs.end(),
- [xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
- "Xreg %i already bound", xr);
-
- m_regs[i].SetBoundTo(xr);
- }
- else
- {
- // reg location must be simplereg; memory locations
- // and immediates are taken care of above.
- if (makeDirty)
- m_xregs[RX(i)].MakeDirty();
- }
-
- ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed");
-}
-
-void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
-{
- bool doStore = false;
-
- switch (m_regs[i].GetLocationType())
- {
- case PPCCachedReg::LocationType::Default:
- case PPCCachedReg::LocationType::SpeculativeImmediate:
- return;
- case PPCCachedReg::LocationType::Bound:
- {
- X64Reg xr = RX(i);
- doStore = m_xregs[xr].IsDirty();
- if (mode == FlushMode::All)
- m_xregs[xr].SetFlushed();
- break;
- }
- case PPCCachedReg::LocationType::Immediate:
- doStore = true;
- break;
- }
-
- if (doStore)
- StoreRegister(i, GetDefaultLocation(i));
- if (mode == FlushMode::All)
- m_regs[i].SetFlushed();
-}
-
-const OpArg& RegCache::R(preg_t preg) const
-{
- return m_regs[preg].Location();
-}
-
-X64Reg RegCache::RX(preg_t preg) const
-{
- ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
- return m_regs[preg].Location().GetSimpleReg();
-}
-
-void RegCache::UnlockAll()
-{
- for (auto& reg : m_regs)
- reg.Unlock();
-}
-
-void RegCache::UnlockAllX()
-{
- for (auto& xreg : m_xregs)
- xreg.Unlock();
-}
-
-bool RegCache::IsFreeX(size_t xreg) const
-{
- return m_xregs[xreg].IsFree();
-}
-
-X64Reg RegCache::GetFreeXReg()
-{
- size_t aCount;
- const X64Reg* aOrder = GetAllocationOrder(&aCount);
- for (size_t i = 0; i < aCount; i++)
- {
- X64Reg xr = aOrder[i];
- if (m_xregs[xr].IsFree())
- {
- return xr;
- }
- }
-
- // Okay, not found; run the register allocator heuristic and figure out which register we should
- // clobber.
- float min_score = std::numeric_limits::max();
- X64Reg best_xreg = INVALID_REG;
- size_t best_preg = 0;
- for (size_t i = 0; i < aCount; i++)
- {
- X64Reg xreg = (X64Reg)aOrder[i];
- preg_t preg = m_xregs[xreg].Contents();
- if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked())
- continue;
- float score = ScoreRegister(xreg);
- if (score < min_score)
- {
- min_score = score;
- best_xreg = xreg;
- best_preg = preg;
- }
- }
-
- if (best_xreg != INVALID_REG)
- {
- StoreFromRegister(best_preg);
- return best_xreg;
- }
-
- // Still no dice? Die!
- ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs");
- return INVALID_REG;
-}
-
-int RegCache::NumFreeRegisters() const
-{
- int count = 0;
- size_t aCount;
- const X64Reg* aOrder = GetAllocationOrder(&aCount);
- for (size_t i = 0; i < aCount; i++)
- if (m_xregs[aOrder[i]].IsFree())
- count++;
- return count;
-}
-
-void RegCache::FlushX(X64Reg reg)
-{
- ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg);
- ASSERT(!m_xregs[reg].IsLocked());
- if (!m_xregs[reg].IsFree())
- {
- StoreFromRegister(m_xregs[reg].Contents());
- }
-}
-
-// Estimate roughly how bad it would be to de-allocate this register. Higher score
-// means more bad.
-float RegCache::ScoreRegister(X64Reg xreg) const
-{
- preg_t preg = m_xregs[xreg].Contents();
- float score = 0;
-
- // If it's not dirty, we don't need a store to write it back to the register file, so
- // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
- // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
- // to the number of extra stores it causes.
- if (m_xregs[xreg].IsDirty())
- score += 2;
-
- // If the register isn't actually needed in a physical register for a later instruction,
- // writing it back to the register file isn't quite as bad.
- if (GetRegUtilization()[preg])
- {
- // Don't look too far ahead; we don't want to have quadratic compilation times for
- // enormous block sizes!
- // This actually improves register allocation a tiny bit; I'm not sure why.
- u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
- // Count how many other registers are going to be used before we need this one again.
- u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
- // Totally ad-hoc heuristic to bias based on how many other registers we'll need
- // before this one gets used again.
- score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
- }
-
- return score;
-}
diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h
deleted file mode 100644
index 9c59cab388..0000000000
--- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include
-#include
-
-#include "Common/Assert.h"
-#include "Common/x64Emitter.h"
-#include "Core/PowerPC/PPCAnalyst.h"
-
-class Jit64;
-
-using preg_t = size_t;
-
-class PPCCachedReg
-{
-public:
- enum class LocationType
- {
- /// Value is currently at its default location
- Default,
- /// Value is currently bound to a x64 register
- Bound,
- /// Value is known as an immediate and has not been written back to its default location
- Immediate,
- /// Value is known as an immediate and is already present at its default location
- SpeculativeImmediate,
- };
-
- PPCCachedReg() = default;
-
- explicit PPCCachedReg(Gen::OpArg default_location_)
- : default_location(default_location_), location(default_location_)
- {
- }
-
- const Gen::OpArg& Location() const { return location; }
-
- LocationType GetLocationType() const
- {
- if (!away)
- {
- if (location.IsImm())
- return LocationType::SpeculativeImmediate;
-
- ASSERT(location == default_location);
- return LocationType::Default;
- }
-
- ASSERT(location.IsImm() || location.IsSimpleReg());
- return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
- }
-
- bool IsAway() const { return away; }
- bool IsBound() const { return GetLocationType() == LocationType::Bound; }
-
- void SetBoundTo(Gen::X64Reg xreg)
- {
- away = true;
- location = Gen::R(xreg);
- }
-
- void SetFlushed()
- {
- away = false;
- location = default_location;
- }
-
- void SetToImm32(u32 imm32, bool dirty = true)
- {
- away |= dirty;
- location = Gen::Imm32(imm32);
- }
-
- bool IsLocked() const { return locked; }
- void Lock() { locked = true; }
- void Unlock() { locked = false; }
-
-private:
- Gen::OpArg default_location{};
- Gen::OpArg location{};
- bool away = false; // value not in source register
- bool locked = false;
-};
-
-class X64CachedReg
-{
-public:
- preg_t Contents() const { return ppcReg; }
-
- void SetBoundTo(preg_t ppcReg_, bool dirty_)
- {
- free = false;
- ppcReg = ppcReg_;
- dirty = dirty_;
- }
-
- void SetFlushed()
- {
- ppcReg = static_cast(Gen::INVALID_REG);
- free = true;
- dirty = false;
- }
-
- bool IsFree() const { return free && !locked; }
-
- bool IsDirty() const { return dirty; }
- void MakeDirty() { dirty = true; }
-
- bool IsLocked() const { return locked; }
- void Lock() { locked = true; }
- void Unlock() { locked = false; }
-
-private:
- preg_t ppcReg = static_cast(Gen::INVALID_REG);
- bool free = true;
- bool dirty = false;
- bool locked = false;
-};
-
-class RegCache
-{
-public:
- enum class FlushMode
- {
- All,
- MaintainState,
- };
-
- static constexpr size_t NUM_XREGS = 16;
-
- explicit RegCache(Jit64& jit);
- virtual ~RegCache() = default;
-
- virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
-
- void Start();
-
- void DiscardRegContentsIfCached(preg_t preg);
- void SetEmitter(Gen::XEmitter* emitter);
-
- void Flush(FlushMode mode = FlushMode::All, BitSet32 regsToFlush = BitSet32::AllTrue(32));
-
- void FlushLockX(Gen::X64Reg reg);
- void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2);
-
- bool SanityCheck() const;
- void KillImmediate(preg_t preg, bool doLoad, bool makeDirty);
-
- // TODO - instead of doload, use "read", "write"
- // read only will not set dirty flag
- void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
- void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::All);
-
- const Gen::OpArg& R(preg_t preg) const;
- Gen::X64Reg RX(preg_t preg) const;
-
- // Register locking.
-
- // these are powerpc reg indices
- template
- void Lock(T p)
- {
- m_regs[p].Lock();
- }
- template
- void Lock(T first, Args... args)
- {
- Lock(first);
- Lock(args...);
- }
-
- // these are x64 reg indices
- template
- void LockX(T x)
- {
- if (m_xregs[x].IsLocked())
- PanicAlert("RegCache: x %i already locked!", x);
- m_xregs[x].Lock();
- }
- template
- void LockX(T first, Args... args)
- {
- LockX(first);
- LockX(args...);
- }
-
- template
- void UnlockX(T x)
- {
- if (!m_xregs[x].IsLocked())
- PanicAlert("RegCache: x %i already unlocked!", x);
- m_xregs[x].Unlock();
- }
- template
- void UnlockX(T first, Args... args)
- {
- UnlockX(first);
- UnlockX(args...);
- }
-
- void UnlockAll();
- void UnlockAllX();
-
- bool IsFreeX(size_t xreg) const;
-
- Gen::X64Reg GetFreeXReg();
- int NumFreeRegisters() const;
-
-protected:
- virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
- virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
-
- virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;
-
- virtual BitSet32 GetRegUtilization() const = 0;
- virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
-
- void FlushX(Gen::X64Reg reg);
-
- float ScoreRegister(Gen::X64Reg xreg) const;
-
- Jit64& m_jit;
- std::array m_regs;
- std::array m_xregs;
- Gen::XEmitter* m_emitter = nullptr;
-};
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
index 358e16cef9..7e0ded66c8 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
@@ -8,7 +8,7 @@
#include "Core/CoreTiming.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
@@ -160,9 +160,13 @@ void Jit64::bcx(UGeckoInstruction inst)
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
- WriteExit(destination, inst.LK, js.compilerPC + 4);
+ {
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
+ gpr.Flush();
+ fpr.Flush();
+ WriteExit(destination, inst.LK, js.compilerPC + 4);
+ }
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget(pConditionDontBranch);
@@ -215,10 +219,14 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
- WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
- // Would really like to continue the block here, but it ends. TODO.
+ {
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
+ gpr.Flush();
+ fpr.Flush();
+ WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
+ // Would really like to continue the block here, but it ends. TODO.
+ }
SetJumpTarget(b);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
@@ -269,9 +277,13 @@ void Jit64::bclrx(UGeckoInstruction inst)
if (inst.LK)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
- WriteBLRExit();
+ {
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
+ gpr.Flush();
+ fpr.Flush();
+ WriteBLRExit();
+ }
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget(pConditionDontBranch);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index 15a3588ff7..fcf65ee9b8 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -12,7 +12,7 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
@@ -76,7 +76,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
std::vector fixups;
for (u32 x : inputs)
{
- MOVDDUP(xmm, fpr.R(x));
+ RCOpArg Rx = fpr.Use(x, RCMode::Read);
+ RegCache::Realize(Rx);
+ MOVDDUP(xmm, Rx);
UCOMISD(xmm, R(xmm));
fixups.push_back(J_CC(CC_P));
}
@@ -102,8 +104,10 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
BLENDVPD(xmm, MConst(psGeneratedQNaN));
for (u32 x : inputs)
{
- avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
- BLENDVPD(xmm, fpr.R(x));
+ RCOpArg Rx = fpr.Use(x, RCMode::Read);
+ RegCache::Realize(Rx);
+ avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
+ BLENDVPD(xmm, Rx);
}
FixupBranch done = J(true);
SwitchToNearCode();
@@ -112,8 +116,8 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
else
{
// SSE2 fallback
- X64Reg tmp = fpr.GetFreeXReg();
- fpr.FlushLockX(tmp);
+ RCX64Reg tmp = fpr.Scratch();
+ RegCache::Realize(tmp);
MOVAPD(clobber, R(xmm));
CMPPD(clobber, R(clobber), CMP_UNORD);
MOVMSKPD(RSCRATCH, R(clobber));
@@ -125,20 +129,21 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
ANDNPD(clobber, R(xmm));
ANDPD(tmp, MConst(psGeneratedQNaN));
ORPD(tmp, R(clobber));
- MOVAPD(xmm, R(tmp));
+ MOVAPD(xmm, tmp);
for (u32 x : inputs)
{
- MOVAPD(clobber, fpr.R(x));
+ RCOpArg Rx = fpr.Use(x, RCMode::Read);
+ RegCache::Realize(Rx);
+ MOVAPD(clobber, Rx);
CMPPD(clobber, R(clobber), CMP_ORD);
MOVAPD(tmp, R(clobber));
- ANDNPD(clobber, fpr.R(x));
- ANDPD(xmm, R(tmp));
+ ANDNPD(clobber, Rx);
+ ANDPD(xmm, tmp);
ORPD(xmm, R(clobber));
}
FixupBranch done = J(true);
SwitchToNearCode();
SetJumpTarget(done);
- fpr.UnlockX(tmp);
}
}
if (xmm_out != xmm)
@@ -172,53 +177,55 @@ void Jit64::fp_arith(UGeckoInstruction inst)
bool round_input = single && !js.op->fprIsSingle[inst.FC];
bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs;
- const auto fp_tri_op = [&](int d, int a, int b, bool reversible,
+ const auto fp_tri_op = [&](int op1, int op2, bool reversible,
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool roundRHS = false) {
- fpr.Lock(d, a, b);
- fpr.BindToRegister(d, d == a || d == b || !single);
- X64Reg dest = preserve_inputs ? XMM1 : fpr.RX(d);
+ RCX64Reg Rd = fpr.Bind(d, !single ? RCMode::ReadWrite : RCMode::Write);
+ RCOpArg Rop1 = fpr.Use(op1, RCMode::Read);
+ RCOpArg Rop2 = fpr.Use(op2, RCMode::Read);
+ RegCache::Realize(Rd, Rop1, Rop2);
+
+ X64Reg dest = preserve_inputs ? XMM1 : static_cast(Rd);
if (roundRHS)
{
- if (d == a && !preserve_inputs)
+ if (d == op1 && !preserve_inputs)
{
- Force25BitPrecision(XMM0, fpr.R(b), XMM1);
- (this->*sseOp)(fpr.RX(d), R(XMM0));
+ Force25BitPrecision(XMM0, Rop2, XMM1);
+ (this->*sseOp)(Rd, R(XMM0));
}
else
{
- Force25BitPrecision(dest, fpr.R(b), XMM0);
- (this->*sseOp)(dest, fpr.R(a));
+ Force25BitPrecision(dest, Rop2, XMM0);
+ (this->*sseOp)(dest, Rop1);
}
}
else
{
- avx_op(avxOp, sseOp, dest, fpr.R(a), fpr.R(b), packed, reversible);
+ avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible);
}
- HandleNaNs(inst, fpr.RX(d), dest);
+ HandleNaNs(inst, Rd, dest);
if (single)
- ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
+ ForceSinglePrecision(Rd, Rd, packed, true);
+ SetFPRFIfNeeded(Rd);
};
switch (inst.SUBOP5)
{
case 18:
- fp_tri_op(d, a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
+ fp_tri_op(a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
packed ? &XEmitter::DIVPD : &XEmitter::DIVSD);
break;
case 20:
- fp_tri_op(d, a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
+ fp_tri_op(a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
packed ? &XEmitter::SUBPD : &XEmitter::SUBSD);
break;
case 21:
- fp_tri_op(d, a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
+ fp_tri_op(a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
packed ? &XEmitter::ADDPD : &XEmitter::ADDSD);
break;
case 25:
- fp_tri_op(d, a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
+ fp_tri_op(a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
packed ? &XEmitter::MULPD : &XEmitter::MULSD, round_input);
break;
default:
@@ -241,17 +248,32 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
bool packed = inst.OPCD == 4 || (!cpu_info.bAtom && single && js.op->fprIsDuplicated[a] &&
js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]);
- fpr.Lock(a, b, c, d);
+ // While we don't know if any games are actually affected (replays seem to work with all the usual
+ // suspects for desyncing), netplay and other applications need absolute perfect determinism, so
+ // be extra careful and don't use FMA, even if in theory it might be okay.
+ // Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared
+ // to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin
+ // instances on different computers giving identical results.
+ const bool use_fma = cpu_info.bFMA && !Core::WantsDeterminism();
+
+ // For use_fma == true:
+ // Statistics suggests b is a lot less likely to be unbound in practice, so
+ // if we have to pick one of a or b to bind, let's make it b.
+ RCOpArg Ra = fpr.Use(a, RCMode::Read);
+ RCOpArg Rb = use_fma ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read);
+ RCOpArg Rc = fpr.Use(c, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite);
+ RegCache::Realize(Ra, Rb, Rc, Rd);
switch (inst.SUBOP5)
{
case 14:
- MOVDDUP(XMM1, fpr.R(c));
+ MOVDDUP(XMM1, Rc);
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
break;
case 15:
- avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
+ avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
break;
@@ -260,38 +282,29 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
X64Reg tmp1 = special ? XMM0 : XMM1;
X64Reg tmp2 = special ? XMM1 : XMM0;
if (single && round_input)
- Force25BitPrecision(tmp1, fpr.R(c), tmp2);
+ Force25BitPrecision(tmp1, Rc, tmp2);
else
- MOVAPD(tmp1, fpr.R(c));
+ MOVAPD(tmp1, Rc);
break;
}
- // While we don't know if any games are actually affected (replays seem to work with all the usual
- // suspects for desyncing), netplay and other applications need absolute perfect determinism, so
- // be extra careful and don't use FMA, even if in theory it might be okay.
- // Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared
- // to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin
- // instances on different computers giving identical results.
- if (cpu_info.bFMA && !Core::WantsDeterminism())
+ if (use_fma)
{
- // Statistics suggests b is a lot less likely to be unbound in practice, so
- // if we have to pick one of a or b to bind, let's make it b.
- fpr.BindToRegister(b, true, false);
switch (inst.SUBOP5)
{
case 28: // msub
if (packed)
- VFMSUB132PD(XMM1, fpr.RX(b), fpr.R(a));
+ VFMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
- VFMSUB132SD(XMM1, fpr.RX(b), fpr.R(a));
+ VFMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
case 14: // madds0
case 15: // madds1
case 29: // madd
if (packed)
- VFMADD132PD(XMM1, fpr.RX(b), fpr.R(a));
+ VFMADD132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
- VFMADD132SD(XMM1, fpr.RX(b), fpr.R(a));
+ VFMADD132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
// PowerPC and x86 define NMADD/NMSUB differently
// x86: D = -A*C (+/-) B
@@ -299,15 +312,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
// so we have to swap them; the ADD/SUB here isn't a typo.
case 30: // nmsub
if (packed)
- VFNMADD132PD(XMM1, fpr.RX(b), fpr.R(a));
+ VFNMADD132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
- VFNMADD132SD(XMM1, fpr.RX(b), fpr.R(a));
+ VFNMADD132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
case 31: // nmadd
if (packed)
- VFNMSUB132PD(XMM1, fpr.RX(b), fpr.R(a));
+ VFNMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
- VFNMSUB132SD(XMM1, fpr.RX(b), fpr.R(a));
+ VFNMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
}
}
@@ -315,15 +328,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
{
// We implement nmsub a little differently ((b - a*c) instead of -(a*c - b)), so handle it
// separately.
- MOVAPD(XMM1, fpr.R(b));
+ MOVAPD(XMM1, Rb);
if (packed)
{
- MULPD(XMM0, fpr.R(a));
+ MULPD(XMM0, Ra);
SUBPD(XMM1, R(XMM0));
}
else
{
- MULSD(XMM0, fpr.R(a));
+ MULSD(XMM0, Ra);
SUBSD(XMM1, R(XMM0));
}
}
@@ -331,36 +344,35 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
{
if (packed)
{
- MULPD(XMM1, fpr.R(a));
+ MULPD(XMM1, Ra);
if (inst.SUBOP5 == 28) // msub
- SUBPD(XMM1, fpr.R(b));
+ SUBPD(XMM1, Rb);
else //(n)madd(s[01])
- ADDPD(XMM1, fpr.R(b));
+ ADDPD(XMM1, Rb);
}
else
{
- MULSD(XMM1, fpr.R(a));
+ MULSD(XMM1, Ra);
if (inst.SUBOP5 == 28)
- SUBSD(XMM1, fpr.R(b));
+ SUBSD(XMM1, Rb);
else
- ADDSD(XMM1, fpr.R(b));
+ ADDSD(XMM1, Rb);
}
if (inst.SUBOP5 == 31) // nmadd
XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits));
}
- fpr.BindToRegister(d, !single);
+
if (single)
{
- HandleNaNs(inst, fpr.RX(d), XMM1);
- ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
+ HandleNaNs(inst, Rd, XMM1);
+ ForceSinglePrecision(Rd, Rd, packed, true);
}
else
{
HandleNaNs(inst, XMM1, XMM1);
- MOVSD(fpr.RX(d), R(XMM1));
+ MOVSD(Rd, R(XMM1));
}
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
+ SetFPRFIfNeeded(Rd);
}
void Jit64::fsign(UGeckoInstruction inst)
@@ -373,29 +385,28 @@ void Jit64::fsign(UGeckoInstruction inst)
int b = inst.FB;
bool packed = inst.OPCD == 4;
- fpr.Lock(b, d);
- OpArg src = fpr.R(b);
- fpr.BindToRegister(d, false);
+ RCOpArg src = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(src, Rd);
switch (inst.SUBOP10)
{
case 40: // neg
- avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
- MConst(packed ? psSignBits2 : psSignBits), packed);
+ avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits),
+ packed);
break;
case 136: // nabs
- avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src,
- MConst(packed ? psSignBits2 : psSignBits), packed);
+ avx_op(&XEmitter::VORPD, &XEmitter::ORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits),
+ packed);
break;
case 264: // abs
- avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src,
- MConst(packed ? psAbsMask2 : psAbsMask), packed);
+ avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, Rd, src, MConst(packed ? psAbsMask2 : psAbsMask),
+ packed);
break;
default:
PanicAlert("fsign bleh");
break;
}
- fpr.UnlockAll();
}
void Jit64::fselx(UGeckoInstruction inst)
@@ -411,35 +422,38 @@ void Jit64::fselx(UGeckoInstruction inst)
bool packed = inst.OPCD == 4; // ps_sel
- fpr.Lock(a, b, c, d);
+ RCOpArg Ra = fpr.Use(a, RCMode::Read);
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCOpArg Rc = fpr.Use(c, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, packed ? RCMode::Write : RCMode::ReadWrite);
+ RegCache::Realize(Ra, Rb, Rc, Rd);
+
XORPD(XMM0, R(XMM0));
// This condition is very tricky; there's only one right way to handle both the case of
// negative/positive zero and NaN properly.
// (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE.
if (packed)
- CMPPD(XMM0, fpr.R(a), CMP_NLE);
+ CMPPD(XMM0, Ra, CMP_NLE);
else
- CMPSD(XMM0, fpr.R(a), CMP_NLE);
+ CMPSD(XMM0, Ra, CMP_NLE);
if (cpu_info.bSSE4_1)
{
- MOVAPD(XMM1, fpr.R(c));
- BLENDVPD(XMM1, fpr.R(b));
+ MOVAPD(XMM1, Rc);
+ BLENDVPD(XMM1, Rb);
}
else
{
MOVAPD(XMM1, R(XMM0));
- ANDPD(XMM0, fpr.R(b));
- ANDNPD(XMM1, fpr.R(c));
+ ANDPD(XMM0, Rb);
+ ANDNPD(XMM1, Rc);
ORPD(XMM1, R(XMM0));
}
- fpr.BindToRegister(d, !packed);
if (packed)
- MOVAPD(fpr.RX(d), R(XMM1));
+ MOVAPD(Rd, R(XMM1));
else
- MOVSD(fpr.RX(d), R(XMM1));
- fpr.UnlockAll();
+ MOVSD(Rd, R(XMM1));
}
void Jit64::fmrx(UGeckoInstruction inst)
@@ -454,26 +468,25 @@ void Jit64::fmrx(UGeckoInstruction inst)
if (d == b)
return;
- fpr.Lock(b, d);
-
- if (fpr.R(d).IsSimpleReg())
+ RCOpArg Rd = fpr.Use(d, RCMode::Write);
+ RegCache::Realize(Rd);
+ if (Rd.IsSimpleReg())
{
- // We don't need to load d, but if it is loaded, we need to mark it as dirty.
- fpr.BindToRegister(d);
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RegCache::Realize(Rb);
// We have to use MOVLPD if b isn't loaded because "MOVSD reg, mem" sets the upper bits (64+)
// to zero and we don't want that.
- if (!fpr.R(b).IsSimpleReg())
- MOVLPD(fpr.RX(d), fpr.R(b));
+ if (!Rb.IsSimpleReg())
+ MOVLPD(Rd.GetSimpleReg(), Rb);
else
- MOVSD(fpr.R(d), fpr.RX(b));
+ MOVSD(Rd, Rb.GetSimpleReg());
}
else
{
- fpr.BindToRegister(b, true, false);
- MOVSD(fpr.R(d), fpr.RX(b));
+ RCOpArg Rb = fpr.Bind(b, RCMode::Read);
+ RegCache::Realize(Rb);
+ MOVSD(Rd, Rb.GetSimpleReg());
}
-
- fpr.UnlockAll();
}
void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
@@ -500,22 +513,22 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
output[3 - (next.CRBB & 3)] |= 1 << dst;
}
- fpr.Lock(a, b);
- fpr.BindToRegister(b, true, false);
+ RCOpArg Ra = upper ? fpr.Bind(a, RCMode::Read) : fpr.Use(a, RCMode::Read);
+ RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
+ RegCache::Realize(Ra, Rb);
if (fprf)
AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
if (upper)
{
- fpr.BindToRegister(a, true, false);
- MOVHLPS(XMM0, fpr.RX(a));
- MOVHLPS(XMM1, fpr.RX(b));
+ MOVHLPS(XMM0, Ra.GetSimpleReg());
+ MOVHLPS(XMM1, Rb);
UCOMISD(XMM1, R(XMM0));
}
else
{
- UCOMISD(fpr.RX(b), fpr.R(a));
+ UCOMISD(Rb, Ra);
}
FixupBranch pNaN, pLesser, pGreater;
@@ -572,7 +585,6 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
}
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
- fpr.UnlockAll();
}
void Jit64::fcmpX(UGeckoInstruction inst)
@@ -591,8 +603,10 @@ void Jit64::fctiwx(UGeckoInstruction inst)
int d = inst.RD;
int b = inst.RB;
- fpr.Lock(d, b);
- fpr.BindToRegister(d);
+
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rb, Rd);
// Intel uses 0x80000000 as a generic error code while PowerPC uses clamping:
//
@@ -606,7 +620,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
// except for -0.0 where they are set to 0xfff80001 (TODO).
MOVAPD(XMM0, MConst(half_qnan_and_s32_max));
- MINSD(XMM0, fpr.R(b));
+ MINSD(XMM0, Rb);
switch (inst.SUBOP10)
{
// fctiwx
@@ -620,8 +634,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
break;
}
// d[64+] must not be modified
- MOVSD(fpr.R(d), XMM0);
- fpr.UnlockAll();
+ MOVSD(Rd, XMM0);
}
void Jit64::frspx(UGeckoInstruction inst)
@@ -633,12 +646,12 @@ void Jit64::frspx(UGeckoInstruction inst)
int d = inst.FD;
bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
- fpr.Lock(b, d);
- OpArg src = fpr.R(b);
- fpr.BindToRegister(d, false);
- ForceSinglePrecision(fpr.RX(d), src, packed, true);
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rb, Rd);
+
+ ForceSinglePrecision(Rd, Rb, packed, true);
+ SetFPRFIfNeeded(Rd);
}
void Jit64::frsqrtex(UGeckoInstruction inst)
@@ -649,15 +662,15 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
- gpr.FlushLockX(RSCRATCH_EXTRA);
- fpr.Lock(b, d);
- fpr.BindToRegister(d);
- MOVAPD(XMM0, fpr.R(b));
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(scratch_guard, Rb, Rd);
+
+ MOVAPD(XMM0, Rb);
CALL(asm_routines.frsqrte);
- MOVSD(fpr.R(d), XMM0);
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
- gpr.UnlockAllX();
+ MOVSD(Rd, XMM0);
+ SetFPRFIfNeeded(Rd);
}
void Jit64::fresx(UGeckoInstruction inst)
@@ -668,13 +681,13 @@ void Jit64::fresx(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
- gpr.FlushLockX(RSCRATCH_EXTRA);
- fpr.Lock(b, d);
- MOVAPD(XMM0, fpr.R(b));
- fpr.BindToRegister(d, false);
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(scratch_guard, Rb, Rd);
+
+ MOVAPD(XMM0, Rb);
CALL(asm_routines.fres);
- MOVDDUP(fpr.RX(d), R(XMM0));
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
- gpr.UnlockAllX();
+ MOVDDUP(Rd, R(XMM0));
+ SetFPRFIfNeeded(Rd);
}
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
index f91443996b..f132959dcb 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@@ -13,7 +13,7 @@
#include "Common/MathUtil.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
@@ -141,9 +141,11 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
// branches, only EQ.
// The flags from any instruction that may set OF (such as ADD/SUB) can not be used for
// LT/GT either.
-void Jit64::ComputeRC(const OpArg& arg, bool needs_test, bool needs_sext)
+void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext)
{
- ASSERT_MSG(DYNA_REC, arg.IsSimpleReg() || arg.IsImm(), "Invalid ComputeRC operand");
+ RCOpArg arg = gpr.Use(preg, RCMode::Read);
+ RegCache::Realize(arg);
+
if (arg.IsImm())
{
MOV(64, PPCSTATE(cr_val[0]), Imm32(arg.SImm32()));
@@ -157,17 +159,21 @@ void Jit64::ComputeRC(const OpArg& arg, bool needs_test, bool needs_sext)
{
MOV(64, PPCSTATE(cr_val[0]), arg);
}
+
if (CheckMergedBranch(0))
{
if (arg.IsImm())
{
- DoMergedBranchImmediate(arg.SImm32());
+ s32 offset = arg.SImm32();
+ arg.Unlock();
+ DoMergedBranchImmediate(offset);
}
else
{
if (needs_test)
{
TEST(32, arg, arg);
+ arg.Unlock();
}
else
{
@@ -175,27 +181,14 @@ void Jit64::ComputeRC(const OpArg& arg, bool needs_test, bool needs_sext)
// better to flush it here so that we don't have to flush it on both sides of the branch.
// We don't want to do this if a test is needed though, because it would interrupt macro-op
// fusion.
- for (int j : ~js.op->gprInUse)
- gpr.StoreFromRegister(j);
+ arg.Unlock();
+ gpr.Flush(~js.op->gprInUse);
}
DoMergedBranchCondition();
}
}
}
-OpArg Jit64::ExtractFromReg(int reg, int offset)
-{
- OpArg src = gpr.R(reg);
- // store to load forwarding should handle this case efficiently
- if (offset)
- {
- gpr.StoreFromRegister(reg, RegCache::FlushMode::MaintainState);
- src = gpr.GetDefaultLocation(reg);
- src.AddMemOffset(offset);
- }
- return src;
-}
-
// we can't do this optimization in the emitter because MOVZX and AND have different effects on
// flags.
void Jit64::AndWithMask(X64Reg reg, u32 mask)
@@ -233,31 +226,28 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop,
void (XEmitter::*op)(int, const OpArg&, const OpArg&), bool Rc, bool carry)
{
bool needs_test = doop == Add;
- gpr.Lock(d, a);
// Be careful; addic treats r0 as r0, but addi treats r0 as zero.
if (a || binary || carry)
{
carry &= js.op->wantsCA;
- if (gpr.R(a).IsImm() && !carry)
+ if (gpr.IsImm(a) && !carry)
{
- gpr.SetImmediate32(d, doop(gpr.R(a).Imm32(), value));
- }
- else if (a == d)
- {
- gpr.BindToRegister(d, true);
- (this->*op)(32, gpr.R(d), Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
+ gpr.SetImmediate32(d, doop(gpr.Imm32(a), value));
}
else
{
- gpr.BindToRegister(d, false);
- if (doop == Add && gpr.R(a).IsSimpleReg() && !carry)
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+ if (doop == Add && Ra.IsSimpleReg() && !carry && d != a)
{
- LEA(32, gpr.RX(d), MDisp(gpr.RX(a), value));
+ LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value));
}
else
{
- MOV(32, gpr.R(d), gpr.R(a));
- (this->*op)(32, gpr.R(d), Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
+ if (d != a)
+ MOV(32, Rd, Ra);
+ (this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
}
}
if (carry)
@@ -273,8 +263,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop,
ASSERT_MSG(DYNA_REC, 0, "WTF regimmop");
}
if (Rc)
- ComputeRC(gpr.R(d), needs_test, doop != And || (value & 0x80000000));
- gpr.UnlockAll();
+ ComputeRC(d, needs_test, doop != And || (value & 0x80000000));
}
void Jit64::reg_imm(UGeckoInstruction inst)
@@ -286,16 +275,16 @@ void Jit64::reg_imm(UGeckoInstruction inst)
{
case 14: // addi
// occasionally used as MOV - emulate, with immediate propagation
- if (gpr.R(a).IsImm() && d != a && a != 0)
+ if (gpr.IsImm(a) && d != a && a != 0)
{
- gpr.SetImmediate32(d, gpr.R(a).Imm32() + (u32)(s32)inst.SIMM_16);
+ gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16);
}
else if (inst.SIMM_16 == 0 && d != a && a != 0)
{
- gpr.Lock(a, d);
- gpr.BindToRegister(d, false, true);
- MOV(32, gpr.R(d), gpr.R(a));
- gpr.UnlockAll();
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+ MOV(32, Rd, Ra);
}
else
{
@@ -416,8 +405,8 @@ void Jit64::DoMergedBranchCondition()
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
const u32 nextPC = js.op[1].address;
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ASSERT(gpr.IsAllUnlocked());
+
FixupBranch pDontBranch;
if (test_bit & 8)
pDontBranch = J_CC(condition ? CC_GE : CC_L, true); // Test < 0, so jump over if >= 0.
@@ -428,10 +417,15 @@ void Jit64::DoMergedBranchCondition()
else // SO bit, do not branch (we don't emulate SO for cmp).
pDontBranch = J(true);
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
+ {
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
- DoMergedBranch();
+ gpr.Flush();
+ fpr.Flush();
+
+ DoMergedBranch();
+ }
SetJumpTarget(pDontBranch);
@@ -452,8 +446,8 @@ void Jit64::DoMergedBranchImmediate(s64 val)
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
const u32 nextPC = js.op[1].address;
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ASSERT(gpr.IsAllUnlocked());
+
bool branch;
if (test_bit & 8)
branch = condition ? val < 0 : val >= 0;
@@ -488,42 +482,39 @@ void Jit64::cmpXX(UGeckoInstruction inst)
u32 crf = inst.CRFD;
bool merge_branch = CheckMergedBranch(crf);
- OpArg comparand;
bool signedCompare;
- if (inst.OPCD == 31)
+ RCOpArg comparand;
+ switch (inst.OPCD)
{
- // cmp / cmpl
- gpr.Lock(a, b);
- comparand = gpr.R(b);
+ // cmp / cmpl
+ case 31:
signedCompare = (inst.SUBOP10 == 0);
- }
- else
- {
- gpr.Lock(a);
- if (inst.OPCD == 10)
- {
- // cmpli
- comparand = Imm32((u32)inst.UIMM);
- signedCompare = false;
- }
- else if (inst.OPCD == 11)
- {
- // cmpi
- comparand = Imm32((u32)(s32)(s16)inst.UIMM);
- signedCompare = true;
- }
- else
- {
- signedCompare = false; // silence compiler warning
- PanicAlert("cmpXX");
- }
+ comparand = signedCompare ? gpr.Use(b, RCMode::Read) : gpr.Bind(b, RCMode::Read);
+ RegCache::Realize(comparand);
+ break;
+
+ // cmpli
+ case 10:
+ signedCompare = false;
+ comparand = RCOpArg::Imm32((u32)inst.UIMM);
+ break;
+
+ // cmpi
+ case 11:
+ signedCompare = true;
+ comparand = RCOpArg::Imm32((u32)(s32)(s16)inst.UIMM);
+ break;
+
+ default:
+ signedCompare = false; // silence compiler warning
+ PanicAlert("cmpXX");
}
- if (gpr.R(a).IsImm() && comparand.IsImm())
+ if (gpr.IsImm(a) && comparand.IsImm())
{
// Both registers contain immediate values, so we can pre-compile the compare result
- s64 compareResult = signedCompare ? (s64)gpr.R(a).SImm32() - (s64)comparand.SImm32() :
- (u64)gpr.R(a).Imm32() - (u64)comparand.Imm32();
+ s64 compareResult = signedCompare ? (s64)gpr.SImm32(a) - (s64)comparand.SImm32() :
+ (u64)gpr.Imm32(a) - (u64)comparand.Imm32();
if (compareResult == (s32)compareResult)
{
MOV(64, PPCSTATE(cr_val[crf]), Imm32((u32)compareResult));
@@ -535,73 +526,83 @@ void Jit64::cmpXX(UGeckoInstruction inst)
}
if (merge_branch)
+ {
+ RegCache::Unlock(comparand);
DoMergedBranchImmediate(compareResult);
+ }
+
+ return;
+ }
+
+ if (!gpr.IsImm(a) && !signedCompare && comparand.IsImm() && comparand.Imm32() == 0)
+ {
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
+ RegCache::Realize(Ra);
+
+ MOV(64, PPCSTATE(cr_val[crf]), Ra);
+ if (merge_branch)
+ {
+ TEST(64, Ra, Ra);
+ RegCache::Unlock(comparand, Ra);
+ DoMergedBranchCondition();
+ }
+ return;
+ }
+
+ const X64Reg input = RSCRATCH;
+ if (gpr.IsImm(a))
+ {
+ if (signedCompare)
+ MOV(64, R(input), Imm32(gpr.SImm32(a)));
+ else
+ MOV(32, R(input), Imm32(gpr.Imm32(a)));
}
else
{
- X64Reg input = RSCRATCH;
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RegCache::Realize(Ra);
if (signedCompare)
- {
- if (gpr.R(a).IsImm())
- MOV(64, R(input), Imm32(gpr.R(a).SImm32()));
- else
- MOVSX(64, 32, input, gpr.R(a));
-
- if (!comparand.IsImm())
- {
- MOVSX(64, 32, RSCRATCH2, comparand);
- comparand = R(RSCRATCH2);
- }
- }
+ MOVSX(64, 32, input, Ra);
else
- {
- if (gpr.R(a).IsImm())
- {
- MOV(32, R(input), Imm32(gpr.R(a).Imm32()));
- }
- else if (comparand.IsImm() && !comparand.Imm32())
- {
- gpr.BindToRegister(a, true, false);
- input = gpr.RX(a);
- }
- else
- {
- MOVZX(64, 32, input, gpr.R(a));
- }
-
- if (comparand.IsImm())
- {
- // sign extension will ruin this, so store it in a register
- if (comparand.Imm32() & 0x80000000U)
- {
- MOV(32, R(RSCRATCH2), comparand);
- comparand = R(RSCRATCH2);
- }
- }
- else
- {
- gpr.BindToRegister(b, true, false);
- comparand = gpr.R(b);
- }
- }
- if (comparand.IsImm() && !comparand.Imm32())
- {
- MOV(64, PPCSTATE(cr_val[crf]), R(input));
- // Place the comparison next to the branch for macro-op fusion
- if (merge_branch)
- TEST(64, R(input), R(input));
- }
- else
- {
- SUB(64, R(input), comparand);
- MOV(64, PPCSTATE(cr_val[crf]), R(input));
- }
-
- if (merge_branch)
- DoMergedBranchCondition();
+ MOVZX(64, 32, input, Ra);
}
- gpr.UnlockAll();
+ if (comparand.IsImm())
+ {
+ // sign extension will ruin this, so store it in a register
+ if (!signedCompare && (comparand.Imm32() & 0x80000000U) != 0)
+ {
+ MOV(32, R(RSCRATCH2), comparand);
+ comparand = RCOpArg::R(RSCRATCH2);
+ }
+ }
+ else
+ {
+ if (signedCompare)
+ {
+ MOVSX(64, 32, RSCRATCH2, comparand);
+ comparand = RCOpArg::R(RSCRATCH2);
+ }
+ }
+
+ if (comparand.IsImm() && comparand.Imm32() == 0)
+ {
+ MOV(64, PPCSTATE(cr_val[crf]), R(input));
+ // Place the comparison next to the branch for macro-op fusion
+ if (merge_branch)
+ TEST(64, R(input), R(input));
+ }
+ else
+ {
+ SUB(64, R(input), comparand);
+ MOV(64, PPCSTATE(cr_val[crf]), R(input));
+ }
+
+ if (merge_branch)
+ {
+ RegCache::Unlock(comparand);
+ DoMergedBranchCondition();
+ }
}
void Jit64::boolX(UGeckoInstruction inst)
@@ -612,10 +613,10 @@ void Jit64::boolX(UGeckoInstruction inst)
bool needs_test = false;
DEBUG_ASSERT_MSG(DYNA_REC, inst.OPCD == 31, "Invalid boolX");
- if (gpr.R(s).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(s, b))
{
- const u32 rs_offset = gpr.R(s).Imm32();
- const u32 rb_offset = gpr.R(b).Imm32();
+ const u32 rs_offset = gpr.Imm32(s);
+ const u32 rb_offset = gpr.Imm32(b);
if (inst.SUBOP10 == 28) // andx
gpr.SetImmediate32(a, rs_offset & rb_offset);
@@ -640,33 +641,33 @@ void Jit64::boolX(UGeckoInstruction inst)
{
if (a != s)
{
- gpr.Lock(a, s);
- gpr.BindToRegister(a, false, true);
- MOV(32, gpr.R(a), gpr.R(s));
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RegCache::Realize(Rs, Ra);
+ MOV(32, Ra, Rs);
}
else if (inst.Rc)
{
- gpr.BindToRegister(a, true, false);
+ gpr.Bind(a, RCMode::Read).Realize();
}
needs_test = true;
}
else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */))
{
- if (a != s)
+ if (a == s && !inst.Rc)
{
- gpr.Lock(a, s);
- gpr.BindToRegister(a, false, true);
- MOV(32, gpr.R(a), gpr.R(s));
- }
- else if (inst.Rc)
- {
- gpr.BindToRegister(a, true, true);
+ RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
+ RegCache::Realize(Ra);
+ NOT(32, Ra);
}
else
{
- gpr.KillImmediate(a, true, true);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
+ RegCache::Realize(Rs, Ra);
+ MOV(32, Ra, Rs);
+ NOT(32, Ra);
}
- NOT(32, gpr.R(a));
needs_test = true;
}
else if ((inst.SUBOP10 == 412 /* orcx */) || (inst.SUBOP10 == 284 /* eqvx */))
@@ -684,70 +685,72 @@ void Jit64::boolX(UGeckoInstruction inst)
}
else if ((a == s) || (a == b))
{
- gpr.Lock(a, ((a == s) ? b : s));
- OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s));
- gpr.BindToRegister(a, true, true);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCOpArg operand = gpr.Use(a == s ? b : s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
+ RegCache::Realize(Rb, Rs, operand, Ra);
if (inst.SUBOP10 == 28) // andx
{
- AND(32, gpr.R(a), operand);
+ AND(32, Ra, operand);
}
else if (inst.SUBOP10 == 476) // nandx
{
- AND(32, gpr.R(a), operand);
- NOT(32, gpr.R(a));
+ AND(32, Ra, operand);
+ NOT(32, Ra);
needs_test = true;
}
else if (inst.SUBOP10 == 60) // andcx
{
- if (cpu_info.bBMI1 && gpr.R(b).IsSimpleReg() && !gpr.R(s).IsImm())
+ if (cpu_info.bBMI1 && Rb.IsSimpleReg() && !Rs.IsImm())
{
- ANDN(32, gpr.RX(a), gpr.RX(b), gpr.R(s));
+ ANDN(32, Ra, Rb.GetSimpleReg(), Rs);
}
else if (a == b)
{
- NOT(32, gpr.R(a));
- AND(32, gpr.R(a), operand);
+ NOT(32, Ra);
+ AND(32, Ra, operand);
}
else
{
MOV(32, R(RSCRATCH), operand);
NOT(32, R(RSCRATCH));
- AND(32, gpr.R(a), R(RSCRATCH));
+ AND(32, Ra, R(RSCRATCH));
}
}
else if (inst.SUBOP10 == 444) // orx
{
- OR(32, gpr.R(a), operand);
+ OR(32, Ra, operand);
}
else if (inst.SUBOP10 == 124) // norx
{
- OR(32, gpr.R(a), operand);
- NOT(32, gpr.R(a));
+ OR(32, Ra, operand);
+ NOT(32, Ra);
needs_test = true;
}
else if (inst.SUBOP10 == 412) // orcx
{
if (a == b)
{
- NOT(32, gpr.R(a));
- OR(32, gpr.R(a), operand);
+ NOT(32, Ra);
+ OR(32, Ra, operand);
}
else
{
MOV(32, R(RSCRATCH), operand);
NOT(32, R(RSCRATCH));
- OR(32, gpr.R(a), R(RSCRATCH));
+ OR(32, Ra, R(RSCRATCH));
}
}
else if (inst.SUBOP10 == 316) // xorx
{
- XOR(32, gpr.R(a), operand);
+ XOR(32, Ra, operand);
}
else if (inst.SUBOP10 == 284) // eqvx
{
- NOT(32, gpr.R(a));
- XOR(32, gpr.R(a), operand);
+ NOT(32, Ra);
+ XOR(32, Ra, operand);
}
else
{
@@ -756,62 +759,64 @@ void Jit64::boolX(UGeckoInstruction inst)
}
else
{
- gpr.Lock(a, s, b);
- gpr.BindToRegister(a, false, true);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RegCache::Realize(Rb, Rs, Ra);
if (inst.SUBOP10 == 28) // andx
{
- MOV(32, gpr.R(a), gpr.R(s));
- AND(32, gpr.R(a), gpr.R(b));
+ MOV(32, Ra, Rs);
+ AND(32, Ra, Rb);
}
else if (inst.SUBOP10 == 476) // nandx
{
- MOV(32, gpr.R(a), gpr.R(s));
- AND(32, gpr.R(a), gpr.R(b));
- NOT(32, gpr.R(a));
+ MOV(32, Ra, Rs);
+ AND(32, Ra, Rb);
+ NOT(32, Ra);
needs_test = true;
}
else if (inst.SUBOP10 == 60) // andcx
{
- if (cpu_info.bBMI1 && gpr.R(b).IsSimpleReg() && !gpr.R(s).IsImm())
+ if (cpu_info.bBMI1 && Rb.IsSimpleReg() && !Rs.IsImm())
{
- ANDN(32, gpr.RX(a), gpr.RX(b), gpr.R(s));
+ ANDN(32, Ra, Rb.GetSimpleReg(), Rs);
}
else
{
- MOV(32, gpr.R(a), gpr.R(b));
- NOT(32, gpr.R(a));
- AND(32, gpr.R(a), gpr.R(s));
+ MOV(32, Ra, Rb);
+ NOT(32, Ra);
+ AND(32, Ra, Rs);
}
}
else if (inst.SUBOP10 == 444) // orx
{
- MOV(32, gpr.R(a), gpr.R(s));
- OR(32, gpr.R(a), gpr.R(b));
+ MOV(32, Ra, Rs);
+ OR(32, Ra, Rb);
}
else if (inst.SUBOP10 == 124) // norx
{
- MOV(32, gpr.R(a), gpr.R(s));
- OR(32, gpr.R(a), gpr.R(b));
- NOT(32, gpr.R(a));
+ MOV(32, Ra, Rs);
+ OR(32, Ra, Rb);
+ NOT(32, Ra);
needs_test = true;
}
else if (inst.SUBOP10 == 412) // orcx
{
- MOV(32, gpr.R(a), gpr.R(b));
- NOT(32, gpr.R(a));
- OR(32, gpr.R(a), gpr.R(s));
+ MOV(32, Ra, Rb);
+ NOT(32, Ra);
+ OR(32, Ra, Rs);
}
else if (inst.SUBOP10 == 316) // xorx
{
- MOV(32, gpr.R(a), gpr.R(s));
- XOR(32, gpr.R(a), gpr.R(b));
+ MOV(32, Ra, Rs);
+ XOR(32, Ra, Rb);
}
else if (inst.SUBOP10 == 284) // eqvx
{
- MOV(32, gpr.R(a), gpr.R(s));
- NOT(32, gpr.R(a));
- XOR(32, gpr.R(a), gpr.R(b));
+ MOV(32, Ra, Rs);
+ NOT(32, Ra);
+ XOR(32, Ra, Rb);
}
else
{
@@ -819,8 +824,7 @@ void Jit64::boolX(UGeckoInstruction inst)
}
}
if (inst.Rc)
- ComputeRC(gpr.R(a), needs_test);
- gpr.UnlockAll();
+ ComputeRC(a, needs_test);
}
void Jit64::extsXx(UGeckoInstruction inst)
@@ -830,19 +834,19 @@ void Jit64::extsXx(UGeckoInstruction inst)
int a = inst.RA, s = inst.RS;
int size = inst.SUBOP10 == 922 ? 16 : 8;
- if (gpr.R(s).IsImm())
+ if (gpr.IsImm(s))
{
- gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.R(s).Imm32() : (s8)gpr.R(s).Imm32()));
+ gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.Imm32(s) : (s8)gpr.Imm32(s)));
}
else
{
- gpr.Lock(a, s);
- gpr.BindToRegister(a, a == s, true);
- MOVSX(32, size, gpr.RX(a), gpr.R(s));
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RegCache::Realize(Rs, Ra);
+ MOVSX(32, size, Ra, Rs);
}
if (inst.Rc)
- ComputeRC(gpr.R(a));
- gpr.UnlockAll();
+ ComputeRC(a);
}
void Jit64::subfic(UGeckoInstruction inst)
@@ -850,40 +854,42 @@ void Jit64::subfic(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
- gpr.Lock(a, d);
- gpr.BindToRegister(d, a == d, true);
+
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
int imm = inst.SIMM_16;
if (d == a)
{
if (imm == 0)
{
// Flags act exactly like subtracting from 0
- NEG(32, gpr.R(d));
+ NEG(32, Rd);
// Output carry is inverted
FinalizeCarry(CC_NC);
}
else if (imm == -1)
{
- NOT(32, gpr.R(d));
+ NOT(32, Rd);
// CA is always set in this case
FinalizeCarry(true);
}
else
{
- NOT(32, gpr.R(d));
- ADD(32, gpr.R(d), Imm32(imm + 1));
+ NOT(32, Rd);
+ ADD(32, Rd, Imm32(imm + 1));
// Output carry is normal
FinalizeCarry(CC_C);
}
}
else
{
- MOV(32, gpr.R(d), Imm32(imm));
- SUB(32, gpr.R(d), gpr.R(a));
+ MOV(32, Rd, Imm32(imm));
+ SUB(32, Rd, Ra);
// Output carry is inverted
FinalizeCarry(CC_NC);
}
- gpr.UnlockAll();
// This instruction has no RC flag
}
@@ -893,54 +899,60 @@ void Jit64::subfx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
- if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(a) && gpr.IsImm(b))
{
- s32 i = gpr.R(b).SImm32(), j = gpr.R(a).SImm32();
+ s32 i = gpr.SImm32(b), j = gpr.SImm32(a);
gpr.SetImmediate32(d, i - j);
if (inst.OE)
GenerateConstantOverflow((s64)i - (s64)j);
}
else
{
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, (d == a || d == b), true);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rb, Rd);
+
if (d == b)
{
- SUB(32, gpr.R(d), gpr.R(a));
+ SUB(32, Rd, Ra);
}
else if (d == a)
{
- MOV(32, R(RSCRATCH), gpr.R(a));
- MOV(32, gpr.R(d), gpr.R(b));
- SUB(32, gpr.R(d), R(RSCRATCH));
+ MOV(32, R(RSCRATCH), Ra);
+ MOV(32, Rd, Rb);
+ SUB(32, Rd, R(RSCRATCH));
}
else
{
- MOV(32, gpr.R(d), gpr.R(b));
- SUB(32, gpr.R(d), gpr.R(a));
+ MOV(32, Rd, Rb);
+ SUB(32, Rd, Ra);
}
if (inst.OE)
GenerateOverflow();
}
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
+ ComputeRC(d);
}
void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow)
{
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
// simplest cases first
if (imm == 0)
{
- XOR(32, gpr.R(d), gpr.R(d));
+ XOR(32, Rd, Rd);
return;
}
if (imm == (u32)-1)
{
if (d != a)
- MOV(32, gpr.R(d), gpr.R(a));
- NEG(32, gpr.R(d));
+ MOV(32, Rd, Ra);
+ NEG(32, Rd);
return;
}
@@ -952,16 +964,16 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow)
{
u32 shift = IntLog2(imm);
// use LEA if it saves an op
- if (d != a && shift <= 3 && shift >= 1 && gpr.R(a).IsSimpleReg())
+ if (d != a && shift <= 3 && shift >= 1 && Ra.IsSimpleReg())
{
- LEA(32, gpr.RX(d), MScaled(gpr.RX(a), SCALE_1 << shift, 0));
+ LEA(32, Rd, MScaled(Ra.GetSimpleReg(), SCALE_1 << shift, 0));
}
else
{
if (d != a)
- MOV(32, gpr.R(d), gpr.R(a));
+ MOV(32, Rd, Ra);
if (shift)
- SHL(32, gpr.R(d), Imm8(shift));
+ SHL(32, Rd, Imm8(shift));
}
return;
}
@@ -971,18 +983,16 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow)
static constexpr std::array lea_scales{{3, 5, 9}};
for (size_t i = 0; i < lea_scales.size(); i++)
{
- if (imm == lea_scales[i])
+ if (imm == lea_scales[i] && Ra.IsSimpleReg())
{
- if (d != a)
- gpr.BindToRegister(a, true, false);
- LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(a), SCALE_2 << i, 0));
+ LEA(32, Rd, MComplex(Ra.GetSimpleReg(), Ra.GetSimpleReg(), SCALE_2 << i, 0));
return;
}
}
}
// if we didn't find any better options
- IMUL(32, gpr.RX(d), gpr.R(a), Imm32(imm));
+ IMUL(32, Rd, Ra, Imm32(imm));
}
void Jit64::mulli(UGeckoInstruction inst)
@@ -992,16 +1002,13 @@ void Jit64::mulli(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
u32 imm = inst.SIMM_16;
- if (gpr.R(a).IsImm())
+ if (gpr.IsImm(a))
{
- gpr.SetImmediate32(d, gpr.R(a).Imm32() * imm);
+ gpr.SetImmediate32(d, gpr.Imm32(a) * imm);
}
else
{
- gpr.Lock(a, d);
- gpr.BindToRegister(d, (d == a), true);
MultiplyImmediate(imm, a, d, false);
- gpr.UnlockAll();
}
}
@@ -1011,42 +1018,46 @@ void Jit64::mullwx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
- if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(a, b))
{
- s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32();
+ s32 i = gpr.SImm32(a), j = gpr.SImm32(b);
gpr.SetImmediate32(d, i * j);
if (inst.OE)
GenerateConstantOverflow((s64)i * (s64)j);
}
+ else if (gpr.IsImm(a) || gpr.IsImm(b))
+ {
+ u32 imm = gpr.IsImm(a) ? gpr.Imm32(a) : gpr.Imm32(b);
+ int src = gpr.IsImm(a) ? b : a;
+ MultiplyImmediate(imm, src, d, inst.OE);
+ if (inst.OE)
+ GenerateOverflow();
+ }
else
{
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, (d == a || d == b), true);
- if (gpr.R(a).IsImm() || gpr.R(b).IsImm())
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rb, Rd);
+
+ if (d == a)
{
- u32 imm = gpr.R(a).IsImm() ? gpr.R(a).Imm32() : gpr.R(b).Imm32();
- int src = gpr.R(a).IsImm() ? b : a;
- MultiplyImmediate(imm, src, d, inst.OE);
- }
- else if (d == a)
- {
- IMUL(32, gpr.RX(d), gpr.R(b));
+ IMUL(32, Rd, Rb);
}
else if (d == b)
{
- IMUL(32, gpr.RX(d), gpr.R(a));
+ IMUL(32, Rd, Ra);
}
else
{
- MOV(32, gpr.R(d), gpr.R(b));
- IMUL(32, gpr.RX(d), gpr.R(a));
+ MOV(32, Rd, Rb);
+ IMUL(32, Rd, Ra);
}
if (inst.OE)
GenerateOverflow();
}
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
+ ComputeRC(d);
}
void Jit64::mulhwXx(UGeckoInstruction inst)
@@ -1056,41 +1067,45 @@ void Jit64::mulhwXx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
bool sign = inst.SUBOP10 == 75;
- if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(a, b))
{
if (sign)
- gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.R(a).SImm32() * (s64)gpr.R(b).SImm32())) >> 32));
+ gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.SImm32(a) * (s64)gpr.SImm32(b))) >> 32));
else
- gpr.SetImmediate32(d, (u32)(((u64)gpr.R(a).Imm32() * (u64)gpr.R(b).Imm32()) >> 32));
+ gpr.SetImmediate32(d, (u32)(((u64)gpr.Imm32(a) * (u64)gpr.Imm32(b)) >> 32));
}
else if (sign)
{
- gpr.Lock(a, b, d);
- // no register choice
- gpr.FlushLockX(EDX, EAX);
- gpr.BindToRegister(d, d == a || d == b, true);
- MOV(32, R(EAX), gpr.R(a));
- gpr.KillImmediate(b, true, false);
- IMUL(32, gpr.R(b));
- MOV(32, gpr.R(d), R(EDX));
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.UseNoImm(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RCX64Reg eax = gpr.Scratch(EAX);
+ RCX64Reg edx = gpr.Scratch(EDX);
+ RegCache::Realize(Ra, Rb, Rd, eax, edx);
+
+ MOV(32, eax, Ra);
+ IMUL(32, Rb);
+ MOV(32, Rd, edx);
}
else
{
// Not faster for signed because we'd need two movsx.
- gpr.Lock(a, b, d);
// We need to bind everything to registers since the top 32 bits need to be zero.
int src = d == b ? a : b;
- gpr.BindToRegister(d, d == a || d == b, true);
- gpr.BindToRegister(src, true, false);
- if (d != a && d != b)
- MOV(32, gpr.R(d), gpr.R(a));
- IMUL(64, gpr.RX(d), gpr.R(src));
- SHR(64, gpr.R(d), Imm8(32));
+ int other = src == b ? a : b;
+
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RCX64Reg Rsrc = gpr.Bind(src, RCMode::Read);
+ RCOpArg Rother = gpr.Use(other, RCMode::Read);
+ RegCache::Realize(Rd, Rsrc, Rother);
+
+ if (other != d)
+ MOV(32, Rd, Rother);
+ IMUL(64, Rd, Rsrc);
+ SHR(64, Rd, Imm8(32));
}
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ComputeRC(d);
}
void Jit64::divwux(UGeckoInstruction inst)
@@ -1099,9 +1114,9 @@ void Jit64::divwux(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
- if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(a, b))
{
- if (gpr.R(b).Imm32() == 0)
+ if (gpr.Imm32(b) == 0)
{
gpr.SetImmediate32(d, 0);
if (inst.OE)
@@ -1109,14 +1124,14 @@ void Jit64::divwux(UGeckoInstruction inst)
}
else
{
- gpr.SetImmediate32(d, gpr.R(a).Imm32() / gpr.R(b).Imm32());
+ gpr.SetImmediate32(d, gpr.Imm32(a) / gpr.Imm32(b));
if (inst.OE)
GenerateConstantOverflow(false);
}
}
- else if (gpr.R(b).IsImm())
+ else if (gpr.IsImm(b))
{
- u32 divisor = gpr.R(b).Imm32();
+ u32 divisor = gpr.Imm32(b);
if (divisor == 0)
{
gpr.SetImmediate32(d, 0);
@@ -1131,12 +1146,14 @@ void Jit64::divwux(UGeckoInstruction inst)
if (divisor == (u32)(1 << shift))
{
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, d == a, true);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
if (d != a)
- MOV(32, gpr.R(d), gpr.R(a));
+ MOV(32, Rd, Ra);
if (shift)
- SHR(32, gpr.R(d), Imm8(shift));
+ SHR(32, Rd, Imm8(shift));
}
else
{
@@ -1148,32 +1165,35 @@ void Jit64::divwux(UGeckoInstruction inst)
if (((u64)(magic + 1) * (max_quotient * divisor - 1)) >> (shift + 32) != max_quotient - 1)
{
// If failed, use slower round-down method
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, d == a, true);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
MOV(32, R(RSCRATCH), Imm32(magic));
if (d != a)
- MOV(32, gpr.R(d), gpr.R(a));
- IMUL(64, gpr.RX(d), R(RSCRATCH));
- ADD(64, gpr.R(d), R(RSCRATCH));
- SHR(64, gpr.R(d), Imm8(shift + 32));
+ MOV(32, Rd, Ra);
+ IMUL(64, Rd, R(RSCRATCH));
+ ADD(64, Rd, R(RSCRATCH));
+ SHR(64, Rd, Imm8(shift + 32));
}
else
{
// If success, use faster round-up method
- gpr.Lock(a, b, d);
- gpr.BindToRegister(a, true, false);
- gpr.BindToRegister(d, false, true);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
if (d == a)
{
MOV(32, R(RSCRATCH), Imm32(magic + 1));
- IMUL(64, gpr.RX(d), R(RSCRATCH));
+ IMUL(64, Rd, R(RSCRATCH));
}
else
{
- MOV(32, gpr.R(d), Imm32(magic + 1));
- IMUL(64, gpr.RX(d), gpr.R(a));
+ MOV(32, Rd, Imm32(magic + 1));
+ IMUL(64, Rd, Ra);
}
- SHR(64, gpr.R(d), Imm8(shift + 32));
+ SHR(64, Rd, Imm8(shift + 32));
}
}
if (inst.OE)
@@ -1182,24 +1202,27 @@ void Jit64::divwux(UGeckoInstruction inst)
}
else
{
- gpr.Lock(a, b, d);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rb = gpr.Bind(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
// no register choice (do we need to do this?)
- gpr.FlushLockX(EAX, EDX);
- gpr.BindToRegister(d, (d == a || d == b), true);
- MOV(32, R(EAX), gpr.R(a));
- XOR(32, R(EDX), R(EDX));
- gpr.KillImmediate(b, true, false);
- CMP_or_TEST(32, gpr.R(b), Imm32(0));
+ RCX64Reg eax = gpr.Scratch(EAX);
+ RCX64Reg edx = gpr.Scratch(EDX);
+ RegCache::Realize(Ra, Rb, Rd, eax, edx);
+
+ MOV(32, eax, Ra);
+ XOR(32, edx, edx);
+ TEST(32, Rb, Rb);
FixupBranch not_div_by_zero = J_CC(CC_NZ);
- MOV(32, gpr.R(d), R(EDX));
+ MOV(32, Rd, edx);
if (inst.OE)
{
GenerateConstantOverflow(true);
}
FixupBranch end = J();
SetJumpTarget(not_div_by_zero);
- DIV(32, gpr.R(b));
- MOV(32, gpr.R(d), R(EAX));
+ DIV(32, Rb);
+ MOV(32, Rd, eax);
if (inst.OE)
{
GenerateConstantOverflow(false);
@@ -1207,9 +1230,7 @@ void Jit64::divwux(UGeckoInstruction inst)
SetJumpTarget(end);
}
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ComputeRC(d);
}
void Jit64::divwx(UGeckoInstruction inst)
@@ -1218,9 +1239,9 @@ void Jit64::divwx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
- if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(a, b))
{
- s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32();
+ s32 i = gpr.SImm32(a), j = gpr.SImm32(b);
if (j == 0 || (i == (s32)0x80000000 && j == -1))
{
const u32 result = i < 0 ? 0xFFFFFFFF : 0x00000000;
@@ -1237,25 +1258,27 @@ void Jit64::divwx(UGeckoInstruction inst)
}
else
{
- gpr.Lock(a, b, d);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rb = gpr.Bind(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
// no register choice
- gpr.FlushLockX(EAX, EDX);
- gpr.BindToRegister(d, (d == a || d == b), true);
- MOV(32, R(EAX), gpr.R(a));
- gpr.BindToRegister(b, true, false);
+ RCX64Reg eax = gpr.Scratch(EAX);
+ RCX64Reg edx = gpr.Scratch(EDX);
+ RegCache::Realize(Ra, Rb, Rd, eax, edx);
- TEST(32, gpr.R(b), gpr.R(b));
+ MOV(32, eax, Ra);
+ TEST(32, Rb, Rb);
const FixupBranch overflow = J_CC(CC_E);
- CMP(32, R(EAX), Imm32(0x80000000));
+ CMP(32, eax, Imm32(0x80000000));
const FixupBranch normal_path1 = J_CC(CC_NE);
- CMP(32, gpr.R(b), Imm32(0xFFFFFFFF));
+ CMP(32, Rb, Imm32(0xFFFFFFFF));
const FixupBranch normal_path2 = J_CC(CC_NE);
SetJumpTarget(overflow);
- SAR(32, R(EAX), Imm8(31));
- MOV(32, gpr.R(d), R(EAX));
+ SAR(32, eax, Imm8(31));
+ MOV(32, Rd, eax);
if (inst.OE)
{
GenerateConstantOverflow(true);
@@ -1266,8 +1289,8 @@ void Jit64::divwx(UGeckoInstruction inst)
SetJumpTarget(normal_path2);
CDQ();
- IDIV(32, gpr.R(b));
- MOV(32, gpr.R(d), R(EAX));
+ IDIV(32, Rb);
+ MOV(32, Rd, eax);
if (inst.OE)
{
GenerateConstantOverflow(false);
@@ -1275,9 +1298,7 @@ void Jit64::divwx(UGeckoInstruction inst)
SetJumpTarget(done);
}
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ComputeRC(d);
}
void Jit64::addx(UGeckoInstruction inst)
@@ -1286,40 +1307,39 @@ void Jit64::addx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
- if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
+ if (gpr.IsImm(a, b))
{
- s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32();
+ s32 i = gpr.SImm32(a), j = gpr.SImm32(b);
gpr.SetImmediate32(d, i + j);
if (inst.OE)
GenerateConstantOverflow((s64)i + (s64)j);
}
- else if ((d == a) || (d == b))
- {
- int operand = ((d == a) ? b : a);
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, true);
- ADD(32, gpr.R(d), gpr.R(operand));
- if (inst.OE)
- GenerateOverflow();
- }
- else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && !inst.OE)
- {
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, false);
- LEA(32, gpr.RX(d), MRegSum(gpr.RX(a), gpr.RX(b)));
- }
else
{
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, false);
- MOV(32, gpr.R(d), gpr.R(a));
- ADD(32, gpr.R(d), gpr.R(b));
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rb, Rd);
+
+ if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE)
+ {
+ LEA(32, Rd, MRegSum(Ra.GetSimpleReg(), Rb.GetSimpleReg()));
+ }
+ else if (d == b)
+ {
+ ADD(32, Rd, Ra);
+ }
+ else
+ {
+ if (d != a)
+ MOV(32, Rd, Ra);
+ ADD(32, Rd, Rb);
+ }
if (inst.OE)
GenerateOverflow();
}
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
+ ComputeRC(d);
}
void Jit64::arithXex(UGeckoInstruction inst)
@@ -1334,8 +1354,6 @@ void Jit64::arithXex(UGeckoInstruction inst)
int d = inst.RD;
bool same_input_sub = !add && regsource && a == b;
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, !same_input_sub && (d == a || d == b));
if (!js.carryFlagSet)
JitGetAndClearCAOV(inst.OE);
else
@@ -1345,45 +1363,56 @@ void Jit64::arithXex(UGeckoInstruction inst)
// Special case: subfe A, B, B is a common compiler idiom
if (same_input_sub)
{
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rd);
+
// Convert carry to borrow
if (!js.carryFlagInverted)
CMC();
- SBB(32, gpr.R(d), gpr.R(d));
+ SBB(32, Rd, Rd);
invertedCarry = true;
}
else if (!add && regsource && d == b)
{
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::ReadWrite);
+ RegCache::Realize(Ra, Rd);
+
if (!js.carryFlagInverted)
CMC();
- SBB(32, gpr.R(d), gpr.R(a));
+ SBB(32, Rd, Ra);
invertedCarry = true;
}
else
{
- OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RCOpArg source =
+ regsource ? gpr.Use(d == b ? a : b, RCMode::Read) : RCOpArg::Imm32(mex ? 0xFFFFFFFF : 0);
+ RegCache::Realize(Ra, Rb, Rd, source);
+
if (d != a && d != b)
- MOV(32, gpr.R(d), gpr.R(a));
+ MOV(32, Rd, Ra);
if (!add)
- NOT(32, gpr.R(d));
+ NOT(32, Rd);
// if the source is an immediate, we can invert carry by going from add -> sub and doing src =
// -1 - src
if (js.carryFlagInverted && source.IsImm())
{
- source = Imm32(-1 - source.SImm32());
- SBB(32, gpr.R(d), source);
+ SBB(32, Rd, Imm32(-1 - source.SImm32()));
invertedCarry = true;
}
else
{
if (js.carryFlagInverted)
CMC();
- ADC(32, gpr.R(d), source);
+ ADC(32, Rd, source);
}
}
FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
+ ComputeRC(d);
}
void Jit64::arithcx(UGeckoInstruction inst)
@@ -1392,37 +1421,41 @@ void Jit64::arithcx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
bool add = !!(inst.SUBOP10 & 2); // add or sub
int a = inst.RA, b = inst.RB, d = inst.RD;
- gpr.Lock(a, b, d);
- gpr.BindToRegister(d, d == a || d == b, true);
- if (d == a && d != b)
{
- if (add)
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rb, Rd);
+
+ if (d == a && d != b)
{
- ADD(32, gpr.R(d), gpr.R(b));
+ if (add)
+ {
+ ADD(32, Rd, Rb);
+ }
+ else
+ {
+ // special case, because sub isn't reversible
+ MOV(32, R(RSCRATCH), Ra);
+ MOV(32, Rd, Rb);
+ SUB(32, Rd, R(RSCRATCH));
+ }
}
else
{
- // special case, because sub isn't reversible
- MOV(32, R(RSCRATCH), gpr.R(a));
- MOV(32, gpr.R(d), gpr.R(b));
- SUB(32, gpr.R(d), R(RSCRATCH));
+ if (d != b)
+ MOV(32, Rd, Rb);
+ if (add)
+ ADD(32, Rd, Ra);
+ else
+ SUB(32, Rd, Ra);
}
}
- else
- {
- if (d != b)
- MOV(32, gpr.R(d), gpr.R(b));
- if (add)
- ADD(32, gpr.R(d), gpr.R(a));
- else
- SUB(32, gpr.R(d), gpr.R(a));
- }
FinalizeCarryOverflow(inst.OE, !add);
if (inst.Rc)
- ComputeRC(gpr.R(d));
- gpr.UnlockAll();
+ ComputeRC(d);
}
void Jit64::rlwinmx(UGeckoInstruction inst)
@@ -1432,15 +1465,15 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
- if (gpr.R(s).IsImm())
+ if (gpr.IsImm(s))
{
- u32 result = gpr.R(s).Imm32();
+ u32 result = gpr.Imm32(s);
if (inst.SH != 0)
result = Common::RotateLeft(result, inst.SH);
result &= MakeRotationMask(inst.MB, inst.ME);
gpr.SetImmediate32(a, result);
if (inst.Rc)
- ComputeRC(gpr.R(a));
+ ComputeRC(a);
}
else
{
@@ -1455,59 +1488,64 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
bool needs_sext = true;
int mask_size = inst.ME - inst.MB + 1;
- gpr.Lock(a, s);
- gpr.BindToRegister(a, a == s);
- if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3)
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RegCache::Realize(Rs, Ra);
+
+ if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3)
{
- LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0));
+ LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0));
}
// common optimized case: byte/word extract
else if (simple_mask && !(inst.SH & (mask_size - 1)))
{
- MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0));
+ MOVZX(32, mask_size, Ra, Rs.ExtractWithByteOffset(inst.SH ? (32 - inst.SH) >> 3 : 0));
needs_sext = false;
}
// another optimized special case: byte/word extract plus shift
else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift &&
((mask >> inst.SH) == 0xff || (mask >> inst.SH) == 0xffff))
{
- MOVZX(32, mask_size, gpr.RX(a), gpr.R(s));
- SHL(32, gpr.R(a), Imm8(inst.SH));
+ MOVZX(32, mask_size, Ra, Rs);
+ SHL(32, Ra, Imm8(inst.SH));
needs_sext = inst.SH + mask_size >= 32;
}
else
{
if (a != s)
- MOV(32, gpr.R(a), gpr.R(s));
+ MOV(32, Ra, Rs);
if (left_shift)
{
- SHL(32, gpr.R(a), Imm8(inst.SH));
+ SHL(32, Ra, Imm8(inst.SH));
}
else if (right_shift)
{
- SHR(32, gpr.R(a), Imm8(inst.MB));
+ SHR(32, Ra, Imm8(inst.MB));
needs_sext = false;
}
else
{
if (inst.SH != 0)
- ROL(32, gpr.R(a), Imm8(inst.SH));
+ ROL(32, Ra, Imm8(inst.SH));
if (!(inst.MB == 0 && inst.ME == 31))
{
// we need flags if we're merging the branch
if (inst.Rc && CheckMergedBranch(0))
- AND(32, gpr.R(a), Imm32(mask));
+ AND(32, Ra, Imm32(mask));
else
- AndWithMask(gpr.RX(a), mask);
+ AndWithMask(Ra, mask);
needs_sext = inst.MB == 0;
needs_test = false;
}
}
}
+
+ Rs.Unlock();
+ Ra.Unlock();
+
if (inst.Rc)
- ComputeRC(gpr.R(a), needs_test, needs_sext);
- gpr.UnlockAll();
+ ComputeRC(a, needs_test, needs_sext);
}
}
@@ -1518,17 +1556,16 @@ void Jit64::rlwimix(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
- if (gpr.R(a).IsImm() && gpr.R(s).IsImm())
+ if (gpr.IsImm(a, s))
{
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
- gpr.SetImmediate32(a, (gpr.R(a).Imm32() & ~mask) |
- (Common::RotateLeft(gpr.R(s).Imm32(), inst.SH) & mask));
+ gpr.SetImmediate32(a,
+ (gpr.Imm32(a) & ~mask) | (Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask));
if (inst.Rc)
- ComputeRC(gpr.R(a));
+ ComputeRC(a);
}
else
{
- gpr.Lock(a, s);
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
bool needs_test = false;
if (mask == 0 || (a == s && inst.SH == 0))
@@ -1537,79 +1574,90 @@ void Jit64::rlwimix(UGeckoInstruction inst)
}
else if (mask == 0xFFFFFFFF)
{
- gpr.BindToRegister(a, a == s, true);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
+ RegCache::Realize(Rs, Ra);
if (a != s)
- MOV(32, gpr.R(a), gpr.R(s));
+ MOV(32, Ra, Rs);
if (inst.SH)
- ROL(32, gpr.R(a), Imm8(inst.SH));
+ ROL(32, Ra, Imm8(inst.SH));
needs_test = true;
}
- else if (gpr.R(s).IsImm())
+ else if (gpr.IsImm(s))
{
- gpr.BindToRegister(a, true, true);
- AndWithMask(gpr.RX(a), ~mask);
- OR(32, gpr.R(a), Imm32(Common::RotateLeft(gpr.R(s).Imm32(), inst.SH) & mask));
+ RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
+ RegCache::Realize(Ra);
+ AndWithMask(Ra, ~mask);
+ OR(32, Ra, Imm32(Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask));
}
else if (inst.SH)
{
bool isLeftShift = mask == 0U - (1U << inst.SH);
bool isRightShift = mask == (1U << inst.SH) - 1;
- if (gpr.R(a).IsImm())
+ if (gpr.IsImm(a))
{
- u32 maskA = gpr.R(a).Imm32() & ~mask;
- gpr.BindToRegister(a, false, true);
- MOV(32, gpr.R(a), gpr.R(s));
+ u32 maskA = gpr.Imm32(a) & ~mask;
+
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RegCache::Realize(Rs, Ra);
+
+ MOV(32, Ra, Rs);
if (isLeftShift)
{
- SHL(32, gpr.R(a), Imm8(inst.SH));
+ SHL(32, Ra, Imm8(inst.SH));
}
else if (isRightShift)
{
- SHR(32, gpr.R(a), Imm8(32 - inst.SH));
+ SHR(32, Ra, Imm8(32 - inst.SH));
}
else
{
- ROL(32, gpr.R(a), Imm8(inst.SH));
- AND(32, gpr.R(a), Imm32(mask));
+ ROL(32, Ra, Imm8(inst.SH));
+ AND(32, Ra, Imm32(mask));
}
- OR(32, gpr.R(a), Imm32(maskA));
+ OR(32, Ra, Imm32(maskA));
}
else
{
// TODO: common cases of this might be faster with pinsrb or abuse of AH
- gpr.BindToRegister(a, true, true);
- MOV(32, R(RSCRATCH), gpr.R(s));
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
+ RegCache::Realize(Rs, Ra);
+
+ MOV(32, R(RSCRATCH), Rs);
if (isLeftShift)
{
SHL(32, R(RSCRATCH), Imm8(inst.SH));
- AndWithMask(gpr.RX(a), ~mask);
- OR(32, gpr.R(a), R(RSCRATCH));
+ AndWithMask(Ra, ~mask);
+ OR(32, Ra, R(RSCRATCH));
}
else if (isRightShift)
{
SHR(32, R(RSCRATCH), Imm8(32 - inst.SH));
- AndWithMask(gpr.RX(a), ~mask);
- OR(32, gpr.R(a), R(RSCRATCH));
+ AndWithMask(Ra, ~mask);
+ OR(32, Ra, R(RSCRATCH));
}
else
{
ROL(32, R(RSCRATCH), Imm8(inst.SH));
- XOR(32, R(RSCRATCH), gpr.R(a));
+ XOR(32, R(RSCRATCH), Ra);
AndWithMask(RSCRATCH, mask);
- XOR(32, gpr.R(a), R(RSCRATCH));
+ XOR(32, Ra, R(RSCRATCH));
}
}
}
else
{
- gpr.BindToRegister(a, true, true);
- XOR(32, gpr.R(a), gpr.R(s));
- AndWithMask(gpr.RX(a), ~mask);
- XOR(32, gpr.R(a), gpr.R(s));
+ RCX64Reg Rs = gpr.Bind(s, RCMode::Read);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
+ RegCache::Realize(Rs, Ra);
+ XOR(32, Ra, Rs);
+ AndWithMask(Ra, ~mask);
+ XOR(32, Ra, Rs);
}
if (inst.Rc)
- ComputeRC(gpr.R(a), needs_test);
- gpr.UnlockAll();
+ ComputeRC(a, needs_test);
}
}
@@ -1620,32 +1668,32 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, s = inst.RS;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
- if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
+ if (gpr.IsImm(b, s))
{
- gpr.SetImmediate32(a, Common::RotateLeft(gpr.R(s).Imm32(), gpr.R(b).Imm32() & 0x1F) & mask);
+ gpr.SetImmediate32(a, Common::RotateLeft(gpr.Imm32(s), gpr.Imm32(b) & 0x1F) & mask);
}
else
{
- // no register choice
- gpr.FlushLockX(ECX);
- gpr.Lock(a, b, s);
- MOV(32, R(ECX), gpr.R(b));
- gpr.BindToRegister(a, (a == s), true);
+ RCX64Reg ecx = gpr.Scratch(ECX); // no register choice
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(ecx, Ra, Rb, Rs);
+
+ MOV(32, ecx, Rb);
if (a != s)
{
- MOV(32, gpr.R(a), gpr.R(s));
+ MOV(32, Ra, Rs);
}
- ROL(32, gpr.R(a), R(ECX));
+ ROL(32, Ra, ecx);
// we need flags if we're merging the branch
if (inst.Rc && CheckMergedBranch(0))
- AND(32, gpr.R(a), Imm32(mask));
+ AND(32, Ra, Imm32(mask));
else
- AndWithMask(gpr.RX(a), mask);
+ AndWithMask(Ra, mask);
}
if (inst.Rc)
- ComputeRC(gpr.R(a), false);
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ComputeRC(a, false);
}
void Jit64::negx(UGeckoInstruction inst)
@@ -1655,25 +1703,26 @@ void Jit64::negx(UGeckoInstruction inst)
int a = inst.RA;
int d = inst.RD;
- if (gpr.R(a).IsImm())
+ if (gpr.IsImm(a))
{
- gpr.SetImmediate32(d, ~(gpr.R(a).Imm32()) + 1);
+ gpr.SetImmediate32(d, ~(gpr.Imm32(a)) + 1);
if (inst.OE)
- GenerateConstantOverflow(gpr.R(d).Imm32() == 0x80000000);
+ GenerateConstantOverflow(gpr.Imm32(d) == 0x80000000);
}
else
{
- gpr.Lock(a, d);
- gpr.BindToRegister(d, a == d, true);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
if (a != d)
- MOV(32, gpr.R(d), gpr.R(a));
- NEG(32, gpr.R(d));
+ MOV(32, Rd, Ra);
+ NEG(32, Rd);
if (inst.OE)
GenerateOverflow();
}
if (inst.Rc)
- ComputeRC(gpr.R(d), false);
- gpr.UnlockAll();
+ ComputeRC(d, false);
}
void Jit64::srwx(UGeckoInstruction inst)
@@ -1684,29 +1733,27 @@ void Jit64::srwx(UGeckoInstruction inst)
int b = inst.RB;
int s = inst.RS;
- if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
+ if (gpr.IsImm(b, s))
{
- u32 amount = gpr.R(b).Imm32();
- gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.R(s).Imm32() >> (amount & 0x1f)));
+ u32 amount = gpr.Imm32(b);
+ gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.Imm32(s) >> (amount & 0x1f)));
}
else
{
- // no register choice
- gpr.FlushLockX(ECX);
- gpr.Lock(a, b, s);
- MOV(32, R(ECX), gpr.R(b));
- gpr.BindToRegister(a, a == s, true);
+ RCX64Reg ecx = gpr.Scratch(ECX); // no register choice
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(ecx, Ra, Rb, Rs);
+
+ MOV(32, ecx, Rb);
if (a != s)
- {
- MOV(32, gpr.R(a), gpr.R(s));
- }
- SHR(64, gpr.R(a), R(ECX));
+ MOV(32, Ra, Rs);
+ SHR(64, Ra, ecx);
}
// Shift of 0 doesn't update flags, so we need to test just in case
if (inst.Rc)
- ComputeRC(gpr.R(a));
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ComputeRC(a);
}
void Jit64::slwx(UGeckoInstruction inst)
@@ -1717,34 +1764,35 @@ void Jit64::slwx(UGeckoInstruction inst)
int b = inst.RB;
int s = inst.RS;
- if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
+ if (gpr.IsImm(b, s))
{
- u32 amount = gpr.R(b).Imm32();
- gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.R(s).Imm32() << (amount & 0x1f));
+ u32 amount = gpr.Imm32(b);
+ gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.Imm32(s) << (amount & 0x1f));
if (inst.Rc)
- ComputeRC(gpr.R(a));
+ ComputeRC(a);
}
else
{
- // no register choice
- gpr.FlushLockX(ECX);
- gpr.Lock(a, b, s);
- MOV(32, R(ECX), gpr.R(b));
- gpr.BindToRegister(a, a == s, true);
+ RCX64Reg ecx = gpr.Scratch(ECX); // no register choice
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(ecx, Ra, Rb, Rs);
+
+ MOV(32, ecx, Rb);
if (a != s)
- MOV(32, gpr.R(a), gpr.R(s));
- SHL(64, gpr.R(a), R(ECX));
+ MOV(32, Ra, Rs);
+ SHL(64, Ra, ecx);
if (inst.Rc)
{
- AND(32, gpr.R(a), gpr.R(a));
- ComputeRC(gpr.R(a), false);
+ AND(32, Ra, Ra);
+ RegCache::Unlock(ecx, Ra, Rb, Rs);
+ ComputeRC(a, false);
}
else
{
- MOVZX(64, 32, gpr.RX(a), gpr.R(a));
+ MOVZX(64, 32, Ra, Ra);
}
- gpr.UnlockAll();
- gpr.UnlockAllX();
}
}
@@ -1757,29 +1805,32 @@ void Jit64::srawx(UGeckoInstruction inst)
int b = inst.RB;
int s = inst.RS;
- gpr.FlushLockX(ECX);
- gpr.Lock(a, s, b);
- gpr.BindToRegister(a, (a == s || a == b), true);
- MOV(32, R(ECX), gpr.R(b));
- if (a != s)
- MOV(32, gpr.R(a), gpr.R(s));
- SHL(64, gpr.R(a), Imm8(32));
- SAR(64, gpr.R(a), R(ECX));
- if (js.op->wantsCA)
{
- MOV(32, R(RSCRATCH), gpr.R(a));
- SHR(64, gpr.R(a), Imm8(32));
- TEST(32, gpr.R(a), R(RSCRATCH));
+ RCX64Reg ecx = gpr.Scratch(ECX); // no register choice
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(ecx, Ra, Rb, Rs);
+
+ MOV(32, ecx, Rb);
+ if (a != s)
+ MOV(32, Ra, Rs);
+ SHL(64, Ra, Imm8(32));
+ SAR(64, Ra, ecx);
+ if (js.op->wantsCA)
+ {
+ MOV(32, R(RSCRATCH), Ra);
+ SHR(64, Ra, Imm8(32));
+ TEST(32, Ra, R(RSCRATCH));
+ }
+ else
+ {
+ SHR(64, Ra, Imm8(32));
+ }
+ FinalizeCarry(CC_NZ);
}
- else
- {
- SHR(64, gpr.R(a), Imm8(32));
- }
- FinalizeCarry(CC_NZ);
if (inst.Rc)
- ComputeRC(gpr.R(a));
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ ComputeRC(a);
}
void Jit64::srawix(UGeckoInstruction inst)
@@ -1792,49 +1843,51 @@ void Jit64::srawix(UGeckoInstruction inst)
if (amount != 0)
{
- gpr.Lock(a, s);
- gpr.BindToRegister(a, a == s, true);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(Ra, Rs);
+
if (!js.op->wantsCA)
{
if (a != s)
- MOV(32, gpr.R(a), gpr.R(s));
- SAR(32, gpr.R(a), Imm8(amount));
+ MOV(32, Ra, Rs);
+ SAR(32, Ra, Imm8(amount));
}
else
{
- MOV(32, R(RSCRATCH), gpr.R(s));
+ MOV(32, R(RSCRATCH), Rs);
if (a != s)
- MOV(32, gpr.R(a), R(RSCRATCH));
+ MOV(32, Ra, R(RSCRATCH));
// some optimized common cases that can be done in slightly fewer ops
if (amount == 1)
{
SHR(32, R(RSCRATCH), Imm8(31)); // sign
- AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
- SAR(32, gpr.R(a), Imm8(1));
+ AND(32, R(RSCRATCH), Ra); // (sign && carry)
+ SAR(32, Ra, Imm8(1));
MOV(8, PPCSTATE(xer_ca),
R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
}
else
{
- SAR(32, gpr.R(a), Imm8(amount));
+ SAR(32, Ra, Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount));
- TEST(32, R(RSCRATCH), gpr.R(a));
+ TEST(32, R(RSCRATCH), Ra);
FinalizeCarry(CC_NZ);
}
}
}
else
{
- gpr.Lock(a, s);
FinalizeCarry(false);
- gpr.BindToRegister(a, a == s, true);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(Ra, Rs);
if (a != s)
- MOV(32, gpr.R(a), gpr.R(s));
+ MOV(32, Ra, Rs);
}
if (inst.Rc)
- ComputeRC(gpr.R(a));
- gpr.UnlockAll();
+ ComputeRC(a);
}
// count leading zeroes
@@ -1846,39 +1899,40 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
int s = inst.RS;
bool needs_test = false;
- if (gpr.R(s).IsImm())
+ if (gpr.IsImm(s))
{
u32 mask = 0x80000000;
u32 i = 0;
for (; i < 32; i++, mask >>= 1)
{
- if (gpr.R(s).Imm32() & mask)
+ if (gpr.Imm32(s) & mask)
break;
}
gpr.SetImmediate32(a, i);
}
else
{
- gpr.Lock(a, s);
- gpr.BindToRegister(a, a == s, true);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(Ra, Rs);
+
if (cpu_info.bLZCNT)
{
- LZCNT(32, gpr.RX(a), gpr.R(s));
+ LZCNT(32, Ra, Rs);
needs_test = true;
}
else
{
- BSR(32, gpr.RX(a), gpr.R(s));
+ BSR(32, Ra, Rs);
FixupBranch gotone = J_CC(CC_NZ);
- MOV(32, gpr.R(a), Imm32(63));
+ MOV(32, Ra, Imm32(63));
SetJumpTarget(gotone);
- XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
+ XOR(32, Ra, Imm8(0x1f)); // flip order
}
}
if (inst.Rc)
- ComputeRC(gpr.R(a), needs_test, false);
- gpr.UnlockAll();
+ ComputeRC(a, needs_test, false);
}
void Jit64::twX(UGeckoInstruction inst)
@@ -1890,13 +1944,17 @@ void Jit64::twX(UGeckoInstruction inst)
if (inst.OPCD == 3) // twi
{
- gpr.KillImmediate(a, true, false);
- CMP(32, gpr.R(a), Imm32((s32)(s16)inst.SIMM_16));
+ RCOpArg Ra = gpr.UseNoImm(a, RCMode::Read);
+ RegCache::Realize(Ra);
+ CMP(32, Ra, Imm32((s32)(s16)inst.SIMM_16));
}
else // tw
{
- gpr.BindToRegister(a, true, false);
- CMP(32, gpr.R(a), gpr.R(inst.RB));
+ s32 b = inst.RB;
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RegCache::Realize(Ra, Rb);
+ CMP(32, Ra, Rb);
}
constexpr std::array conditions{{CC_A, CC_B, CC_E, CC_G, CC_L}};
@@ -1912,17 +1970,22 @@ void Jit64::twX(UGeckoInstruction inst)
}
FixupBranch dont_trap = J();
- for (const FixupBranch& fixup : fixups)
{
- SetJumpTarget(fixup);
+ RCForkGuard gpr_guard = gpr.Fork();
+ RCForkGuard fpr_guard = fpr.Fork();
+
+ for (const FixupBranch& fixup : fixups)
+ {
+ SetJumpTarget(fixup);
+ }
+ LOCK();
+ OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM));
+
+ gpr.Flush();
+ fpr.Flush();
+
+ WriteExceptionExit();
}
- LOCK();
- OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM));
-
- gpr.Flush(RegCache::FlushMode::MaintainState);
- fpr.Flush(RegCache::FlushMode::MaintainState);
-
- WriteExceptionExit();
SetJumpTarget(dont_trap);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
index bb58075107..0c6bb929a2 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
@@ -18,7 +18,7 @@
#include "Core/CoreTiming.h"
#include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
@@ -126,12 +126,14 @@ void Jit64::lXXx(UGeckoInstruction inst)
js.op[2].inst.hex == 0x4182fff8)
{
s32 offset = (s32)(s16)inst.SIMM_16;
- gpr.BindToRegister(a, true, false);
- gpr.BindToRegister(d, false, true);
- SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, CallerSavedRegistersInUse(), signExtend);
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rd);
+
+ SafeLoadToReg(Rd, Ra, accessSize, offset, CallerSavedRegistersInUse(), signExtend);
// if it's still 0, we can wait until the next event
- TEST(32, gpr.R(d), gpr.R(d));
+ TEST(32, Rd, Rd);
FixupBranch noIdle = J_CC(CC_NZ);
BitSet32 registersInUse = CallerSavedRegistersInUse();
@@ -155,7 +157,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// Determine whether this instruction updates inst.RA
bool update;
if (inst.OPCD == 31)
- update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.R(b).IsImm() || gpr.R(b).Imm32() != 0);
+ update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.IsImm(b) || gpr.Imm32(b) != 0);
else
update = ((inst.OPCD & 1) != 0) && inst.SIMM_16 != 0;
@@ -165,19 +167,20 @@ void Jit64::lXXx(UGeckoInstruction inst)
bool storeAddress = false;
s32 loadOffset = 0;
+ // Prepare result
+ RCX64Reg Rd = jo.memcheck ? gpr.RevertableBind(d, RCMode::Write) : gpr.Bind(d, RCMode::Write);
+
// Prepare address operand
- OpArg opAddress;
+ RCOpArg opAddress;
if (!update && !a)
{
if (indexed)
{
- if (!gpr.R(b).IsImm())
- gpr.BindToRegister(b, true, false);
- opAddress = gpr.R(b);
+ opAddress = gpr.BindOrImm(b, RCMode::Read);
}
else
{
- opAddress = Imm32((u32)(s32)inst.SIMM_16);
+ opAddress = RCOpArg::Imm32((u32)(s32)inst.SIMM_16);
}
}
else if (update && ((a == 0) || (d == a)))
@@ -186,36 +189,40 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
else
{
- if (!indexed && gpr.R(a).IsImm() && !jo.memcheck)
+ if (!indexed && gpr.IsImm(a) && !jo.memcheck)
{
- u32 val = gpr.R(a).Imm32() + inst.SIMM_16;
- opAddress = Imm32(val);
+ u32 val = gpr.Imm32(a) + inst.SIMM_16;
+ opAddress = RCOpArg::Imm32(val);
if (update)
gpr.SetImmediate32(a, val);
}
- else if (indexed && gpr.R(a).IsImm() && gpr.R(b).IsImm() && !jo.memcheck)
+ else if (indexed && gpr.IsImm(a) && gpr.IsImm(b) && !jo.memcheck)
{
- u32 val = gpr.R(a).Imm32() + gpr.R(b).Imm32();
- opAddress = Imm32(val);
+ u32 val = gpr.Imm32(a) + gpr.Imm32(b);
+ opAddress = RCOpArg::Imm32(val);
if (update)
gpr.SetImmediate32(a, val);
}
else
{
// If we're using reg+reg mode and b is an immediate, pretend we're using constant offset mode
- bool use_constant_offset = !indexed || gpr.R(b).IsImm();
+ const bool use_constant_offset = !indexed || gpr.IsImm(b);
s32 offset = 0;
if (use_constant_offset)
- offset = indexed ? gpr.R(b).SImm32() : (s32)inst.SIMM_16;
+ offset = indexed ? gpr.SImm32(b) : (s32)inst.SIMM_16;
+
+ RCOpArg Rb = use_constant_offset ? RCOpArg{} : gpr.Use(b, RCMode::Read);
+
// Depending on whether we have an immediate and/or update, find the optimum way to calculate
// the load address.
if ((update || use_constant_offset) && !jo.memcheck)
{
- gpr.BindToRegister(a, true, update);
- opAddress = gpr.R(a);
+ opAddress = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
+ RegCache::Realize(opAddress, Rb);
+
if (!use_constant_offset)
- ADD(32, opAddress, gpr.R(b));
+ ADD(32, opAddress, Rb);
else if (update)
ADD(32, opAddress, Imm32((u32)offset));
else
@@ -223,51 +230,36 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
else
{
- // In this case we need an extra temporary register.
- opAddress = R(RSCRATCH2);
storeAddress = true;
+ // In this case we need an extra temporary register.
+ opAddress = RCOpArg::R(RSCRATCH2);
+ RCOpArg Ra = gpr.Use(a, RCMode::Read);
+ RegCache::Realize(opAddress, Ra, Rb);
+
if (use_constant_offset)
- MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset));
+ MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)offset));
else
- MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
+ MOV_sum(32, RSCRATCH2, Ra, Rb);
}
}
}
- gpr.Lock(a, b, d);
-
- if (update && storeAddress)
- gpr.BindToRegister(a, true, true);
-
- // A bit of an evil hack here. We need to retain the original value of this register for the
- // exception path, but we'd rather not needlessly pass it around if we don't have to, since
- // the exception path is very rare. So we store the value in the regcache, let the load path
- // clobber it, then restore the value in the exception path.
- // TODO: no other load has to do this at the moment, since no other loads go directly to the
- // target registers, but if that ever changes, we need to do it there too.
- if (jo.memcheck)
- {
- gpr.StoreFromRegister(d);
- js.revertGprLoad = d;
- }
- gpr.BindToRegister(d, false, true);
+ RCX64Reg Ra = (update && storeAddress) ? gpr.Bind(a, RCMode::Write) : RCX64Reg{};
+ RegCache::Realize(opAddress, Ra, Rd);
BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update && storeAddress)
registersInUse[RSCRATCH2] = true;
- SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
+ SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress)
- MOV(32, gpr.R(a), opAddress);
+ MOV(32, Ra, opAddress);
// TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed)
- BSWAP(accessSize, gpr.RX(d));
-
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ BSWAP(accessSize, Rd);
}
void Jit64::dcbx(UGeckoInstruction inst)
@@ -277,10 +269,12 @@ void Jit64::dcbx(UGeckoInstruction inst)
X64Reg addr = RSCRATCH;
X64Reg value = RSCRATCH2;
- X64Reg tmp = gpr.GetFreeXReg();
- gpr.FlushLockX(tmp);
+ RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
+ RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read);
+ RCX64Reg tmp = gpr.Scratch();
+ RegCache::Realize(Ra, Rb, tmp);
- MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB));
+ MOV_sum(32, addr, Ra, Rb);
// Check whether a JIT cache line needs to be invalidated.
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
@@ -305,8 +299,6 @@ void Jit64::dcbx(UGeckoInstruction inst)
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
-
- gpr.UnlockAllX();
}
void Jit64::dcbt(UGeckoInstruction inst)
@@ -338,10 +330,14 @@ void Jit64::dcbz(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
- MOV(32, R(RSCRATCH), gpr.R(b));
- if (a)
- ADD(32, R(RSCRATCH), gpr.R(a));
- AND(32, R(RSCRATCH), Imm32(~31));
+ {
+ RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RegCache::Realize(Ra, Rb);
+
+ MOV_sum(32, RSCRATCH, Ra, Rb);
+ AND(32, R(RSCRATCH), Imm32(~31));
+ }
if (MSR.DR)
{
@@ -407,10 +403,14 @@ void Jit64::stX(UGeckoInstruction inst)
}
// If we already know the address of the write
- if (!a || gpr.R(a).IsImm())
+ if (!a || gpr.IsImm(a))
{
- u32 addr = (a ? gpr.R(a).Imm32() : 0) + offset;
- bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse());
+ const u32 addr = (a ? gpr.Imm32(a) : 0) + offset;
+ const bool exception = [&] {
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(Rs);
+ return WriteToConstAddress(accessSize, Rs, addr, CallerSavedRegistersInUse());
+ }();
if (update)
{
if (!jo.memcheck || !exception)
@@ -419,42 +419,35 @@ void Jit64::stX(UGeckoInstruction inst)
}
else
{
- gpr.KillImmediate(a, true, true);
+ RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
+ RegCache::Realize(Ra);
MemoryExceptionCheck();
- ADD(32, gpr.R(a), Imm32((u32)offset));
+ ADD(32, Ra, Imm32((u32)offset));
}
}
}
else
{
- gpr.Lock(a, s);
- gpr.BindToRegister(a, true, update);
- if (gpr.R(s).IsImm())
+ RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
+ RCOpArg reg_value;
+ if (!gpr.IsImm(s) && WriteClobbersRegValue(accessSize, /* swap */ true))
{
- SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(),
- SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
+ RCOpArg Rs = gpr.Use(s, RCMode::Read);
+ RegCache::Realize(Rs);
+ reg_value = RCOpArg::R(RSCRATCH2);
+ MOV(32, reg_value, Rs);
}
else
{
- X64Reg reg_value;
- if (WriteClobbersRegValue(accessSize, /* swap */ true))
- {
- MOV(32, R(RSCRATCH2), gpr.R(s));
- reg_value = RSCRATCH2;
- }
- else
- {
- gpr.BindToRegister(s, true, false);
- reg_value = gpr.RX(s);
- }
- SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(),
- SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
+ reg_value = gpr.BindOrImm(s, RCMode::Read);
}
+ RegCache::Realize(Ra, reg_value);
+ SafeWriteRegToReg(reg_value, Ra, accessSize, offset, CallerSavedRegistersInUse(),
+ SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
if (update)
- ADD(32, gpr.R(a), Imm32((u32)offset));
+ ADD(32, Ra, Imm32((u32)offset));
}
- gpr.UnlockAll();
}
void Jit64::stXx(UGeckoInstruction inst)
@@ -467,13 +460,6 @@ void Jit64::stXx(UGeckoInstruction inst)
bool byte_reverse = !!(inst.SUBOP10 & 512);
FALLBACK_IF(!a || (update && a == s) || (update && jo.memcheck && a == b));
- gpr.Lock(a, b, s);
-
- if (update)
- gpr.BindToRegister(a, true, true);
-
- MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
-
int accessSize;
switch (inst.SUBOP10 & ~32)
{
@@ -494,39 +480,28 @@ void Jit64::stXx(UGeckoInstruction inst)
break;
}
- if (gpr.R(s).IsImm())
+ const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse);
+
+ RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = does_clobber ? gpr.Use(s, RCMode::Read) : gpr.BindOrImm(s, RCMode::Read);
+ RegCache::Realize(Ra, Rb, Rs);
+
+ MOV_sum(32, RSCRATCH2, Ra, Rb);
+
+ if (!Rs.IsImm() && does_clobber)
{
- BitSet32 registersInUse = CallerSavedRegistersInUse();
- if (update)
- registersInUse[RSCRATCH2] = true;
- SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse,
- byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
- }
- else
- {
- X64Reg reg_value;
- if (WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse))
- {
- MOV(32, R(RSCRATCH), gpr.R(s));
- reg_value = RSCRATCH;
- }
- else
- {
- gpr.BindToRegister(s, true, false);
- reg_value = gpr.RX(s);
- }
- BitSet32 registersInUse = CallerSavedRegistersInUse();
- if (update)
- registersInUse[RSCRATCH2] = true;
- SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse,
- byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
+ MOV(32, R(RSCRATCH), Rs);
+ Rs = RCOpArg::R(RSCRATCH);
}
+ BitSet32 registersInUse = CallerSavedRegistersInUse();
+ if (update)
+ registersInUse[RSCRATCH2] = true;
+ SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, registersInUse,
+ byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
if (update)
- MOV(32, gpr.R(a), R(RSCRATCH2));
-
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ MOV(32, Ra, R(RSCRATCH2));
}
// A few games use these heavily in video codecs.
@@ -535,18 +510,22 @@ void Jit64::lmw(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
+ int a = inst.RA, d = inst.RD;
+
// TODO: This doesn't handle rollback on DSI correctly
- MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16));
- if (inst.RA)
- ADD(32, R(RSCRATCH2), gpr.R(inst.RA));
- for (int i = inst.RD; i < 32; i++)
{
- SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4,
- CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false);
- gpr.BindToRegister(i, false, true);
- MOV(32, gpr.R(i), R(RSCRATCH));
+ RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
+ RegCache::Realize(Ra);
+ MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)(s32)inst.SIMM_16));
+ }
+ for (int i = d; i < 32; i++)
+ {
+ SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4,
+ CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false);
+ RCOpArg Ri = gpr.Bind(i, RCMode::Write);
+ RegCache::Realize(Ri);
+ MOV(32, Ri, R(RSCRATCH));
}
- gpr.UnlockAllX();
}
void Jit64::stmw(UGeckoInstruction inst)
@@ -554,26 +533,27 @@ void Jit64::stmw(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
+ int a = inst.RA, d = inst.RD;
+
// TODO: This doesn't handle rollback on DSI correctly
- for (int i = inst.RD; i < 32; i++)
+ for (int i = d; i < 32; i++)
{
- if (inst.RA)
- MOV(32, R(RSCRATCH), gpr.R(inst.RA));
- else
+ RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
+ RCOpArg Ri = gpr.Use(i, RCMode::Read);
+ RegCache::Realize(Ra, Ri);
+
+ if (Ra.IsZero())
XOR(32, R(RSCRATCH), R(RSCRATCH));
- if (gpr.R(i).IsImm())
- {
- SafeWriteRegToReg(gpr.R(i), RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16,
- CallerSavedRegistersInUse());
- }
else
+ MOV(32, R(RSCRATCH), Ra);
+ if (!Ri.IsImm())
{
- MOV(32, R(RSCRATCH2), gpr.R(i));
- SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16,
- CallerSavedRegistersInUse());
+ MOV(32, R(RSCRATCH2), Ri);
+ Ri = RCOpArg::R(RSCRATCH2);
}
+ SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16,
+ CallerSavedRegistersInUse());
}
- gpr.UnlockAllX();
}
void Jit64::eieio(UGeckoInstruction inst)
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
index e89bf3f9d6..33a9f41a65 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
@@ -6,7 +6,7 @@
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
using namespace Gen;
@@ -30,25 +30,27 @@ void Jit64::lfXXX(UGeckoInstruction inst)
FALLBACK_IF(!indexed && !a);
- gpr.BindToRegister(a, true, update);
-
s32 offset = 0;
- OpArg addr = gpr.R(a);
+ RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
+ RegCache::Realize(addr);
+
if (update && jo.memcheck)
{
- addr = R(RSCRATCH2);
- MOV(32, addr, gpr.R(a));
+ MOV(32, R(RSCRATCH2), addr);
+ addr = RCOpArg::R(RSCRATCH2);
}
if (indexed)
{
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RegCache::Realize(Rb);
if (update)
{
- ADD(32, addr, gpr.R(b));
+ ADD(32, addr, Rb);
}
else
{
- addr = R(RSCRATCH2);
- MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
+ MOV_sum(32, RSCRATCH2, a ? addr.Location() : Imm32(0), Rb);
+ addr = RCOpArg::R(RSCRATCH2);
}
}
else
@@ -59,13 +61,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
offset = (s16)inst.SIMM_16;
}
- fpr.Lock(d);
- if (jo.memcheck && single)
- {
- fpr.StoreFromRegister(d);
- js.revertFprLoad = d;
- }
- fpr.BindToRegister(d, !single);
+ RCMode Rd_mode = !single ? RCMode::ReadWrite : RCMode::Write;
+ RCX64Reg Rd = jo.memcheck && single ? fpr.RevertableBind(d, Rd_mode) : fpr.Bind(d, Rd_mode);
+ RegCache::Realize(Rd);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && jo.memcheck)
registersInUse[RSCRATCH2] = true;
@@ -73,17 +71,19 @@ void Jit64::lfXXX(UGeckoInstruction inst)
if (single)
{
- ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
+ ConvertSingleToDouble(Rd, RSCRATCH, true);
}
else
{
MOVQ_xmm(XMM0, R(RSCRATCH));
- MOVSD(fpr.RX(d), R(XMM0));
+ MOVSD(Rd, R(XMM0));
}
if (update && jo.memcheck)
- MOV(32, gpr.R(a), addr);
- fpr.UnlockAll();
- gpr.UnlockAll();
+ {
+ RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
+ RegCache::Realize(Ra);
+ MOV(32, Ra, addr);
+ }
}
void Jit64::stfXXX(UGeckoInstruction inst)
@@ -107,26 +107,31 @@ void Jit64::stfXXX(UGeckoInstruction inst)
{
if (js.op->fprIsStoreSafe[s])
{
- CVTSD2SS(XMM0, fpr.R(s));
+ RCOpArg Rs = fpr.Use(s, RCMode::Read);
+ RegCache::Realize(Rs);
+ CVTSD2SS(XMM0, Rs);
}
else
{
- fpr.BindToRegister(s, true, false);
- ConvertDoubleToSingle(XMM0, fpr.RX(s));
+ RCX64Reg Rs = fpr.Bind(s, RCMode::Read);
+ RegCache::Realize(Rs);
+ ConvertDoubleToSingle(XMM0, Rs);
}
MOVD_xmm(R(RSCRATCH), XMM0);
}
else
{
- if (fpr.R(s).IsSimpleReg())
- MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
+ RCOpArg Rs = fpr.Use(s, RCMode::Read);
+ RegCache::Realize(Rs);
+ if (Rs.IsSimpleReg())
+ MOVQ_xmm(R(RSCRATCH), Rs.GetSimpleReg());
else
- MOV(64, R(RSCRATCH), fpr.R(s));
+ MOV(64, R(RSCRATCH), Rs);
}
- if (!indexed && (!a || gpr.R(a).IsImm()))
+ if (!indexed && (!a || gpr.IsImm(a)))
{
- u32 addr = (a ? gpr.R(a).Imm32() : 0) + imm;
+ u32 addr = (a ? gpr.Imm32(a) : 0) + imm;
bool exception =
WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse());
@@ -138,33 +143,34 @@ void Jit64::stfXXX(UGeckoInstruction inst)
}
else
{
- gpr.KillImmediate(a, true, true);
+ RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
+ RegCache::Realize(Ra);
MemoryExceptionCheck();
- ADD(32, gpr.R(a), Imm32((u32)imm));
+ ADD(32, Ra, Imm32((u32)imm));
}
}
- fpr.UnlockAll();
- gpr.UnlockAll();
return;
}
s32 offset = 0;
- if (update)
- gpr.BindToRegister(a, true, true);
+ RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
+ RegCache::Realize(Ra);
if (indexed)
{
- MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RegCache::Realize(Rb);
+ MOV_sum(32, RSCRATCH2, a ? Ra.Location() : Imm32(0), Rb);
}
else
{
if (update)
{
- LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm));
+ MOV_sum(32, RSCRATCH2, Ra, Imm32(imm));
}
else
{
offset = imm;
- MOV(32, R(RSCRATCH2), gpr.R(a));
+ MOV(32, R(RSCRATCH2), Ra);
}
}
@@ -176,11 +182,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
if (update)
- MOV(32, gpr.R(a), R(RSCRATCH2));
-
- fpr.UnlockAll();
- gpr.UnlockAll();
- gpr.UnlockAllX();
+ MOV(32, Ra, R(RSCRATCH2));
}
// This one is a little bit weird; it stores the low 32 bits of a double without converting it
@@ -193,12 +195,16 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
- MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
+ RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
+ RCOpArg Rb = gpr.Use(b, RCMode::Read);
+ RCOpArg Rs = fpr.Use(s, RCMode::Read);
+ RegCache::Realize(Ra, Rb, Rs);
- if (fpr.R(s).IsSimpleReg())
- MOVD_xmm(R(RSCRATCH), fpr.RX(s));
+ MOV_sum(32, RSCRATCH2, Ra, Rb);
+
+ if (Rs.IsSimpleReg())
+ MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg());
else
- MOV(32, R(RSCRATCH), fpr.R(s));
+ MOV(32, R(RSCRATCH), Rs);
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse());
- gpr.UnlockAllX();
}
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
index bf42fa3774..05bbfd6e6c 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
@@ -9,7 +9,7 @@
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/PowerPC.h"
@@ -40,21 +40,22 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
bool gqrIsConstant = it != js.constantGqr.end();
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
- gpr.Lock(a, b);
- gpr.FlushLockX(RSCRATCH_EXTRA);
- if (update)
- gpr.BindToRegister(a, true, true);
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
+ RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
+ RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
+ RCOpArg Rs = fpr.Use(s, RCMode::Read);
+ RegCache::Realize(scratch_guard, Ra, Rb, Rs);
- MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
+ MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);
// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
- MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
+ MOV(32, Ra, R(RSCRATCH_EXTRA));
if (w)
- CVTSD2SS(XMM0, fpr.R(s)); // one
+ CVTSD2SS(XMM0, Rs); // one
else
- CVTPD2PS(XMM0, fpr.R(s)); // pair
+ CVTPD2PS(XMM0, Rs); // pair
if (gqrIsConstant)
{
@@ -104,13 +105,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
if (update && jo.memcheck)
{
- if (indexed)
- ADD(32, gpr.R(a), gpr.R(b));
- else
- ADD(32, gpr.R(a), Imm32((u32)offset));
+ ADD(32, Ra, Rb);
}
- gpr.UnlockAll();
- gpr.UnlockAllX();
}
void Jit64::psq_lXX(UGeckoInstruction inst)
@@ -135,17 +131,17 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
bool gqrIsConstant = it != js.constantGqr.end();
u32 gqrValue = gqrIsConstant ? it->second >> 16 : 0;
- gpr.Lock(a, b);
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
+ RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
+ RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
+ RCX64Reg Rs = fpr.Bind(s, RCMode::Write);
+ RegCache::Realize(scratch_guard, Ra, Rb, Rs);
- gpr.FlushLockX(RSCRATCH_EXTRA);
- gpr.BindToRegister(a, true, update);
- fpr.BindToRegister(s, false, true);
-
- MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
+ MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);
// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
- MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
+ MOV(32, Ra, R(RSCRATCH_EXTRA));
if (gqrIsConstant)
{
@@ -169,15 +165,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MatR(RSCRATCH));
}
- CVTPS2PD(fpr.RX(s), R(XMM0));
+ CVTPS2PD(Rs, R(XMM0));
if (update && jo.memcheck)
{
- if (indexed)
- ADD(32, gpr.R(a), gpr.R(b));
- else
- ADD(32, gpr.R(a), Imm32((u32)offset));
+ ADD(32, Ra, Rb);
}
-
- gpr.UnlockAll();
- gpr.UnlockAllX();
}
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
index 45ea288137..f75489be86 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@@ -7,7 +7,7 @@
#include "Common/MsgHandler.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
using namespace Gen;
@@ -22,8 +22,10 @@ void Jit64::ps_mr(UGeckoInstruction inst)
if (d == b)
return;
- fpr.BindToRegister(d, false);
- MOVAPD(fpr.RX(d), fpr.R(b));
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rb, Rd);
+ MOVAPD(Rd, Rb);
}
void Jit64::ps_sum(UGeckoInstruction inst)
@@ -36,43 +38,46 @@ void Jit64::ps_sum(UGeckoInstruction inst)
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
- fpr.Lock(a, b, c, d);
- OpArg op_a = fpr.R(a);
- fpr.BindToRegister(d, d == b || d == c);
+
+ RCOpArg Ra = fpr.Use(a, RCMode::Read);
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCOpArg Rc = fpr.Use(c, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rb, Rc, Rd);
+
X64Reg tmp = XMM1;
- MOVDDUP(tmp, op_a); // {a.ps0, a.ps0}
- ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
+ MOVDDUP(tmp, Ra); // {a.ps0, a.ps0}
+ ADDPD(tmp, Rb); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
switch (inst.SUBOP5)
{
case 10: // ps_sum0: {a.ps0 + b.ps1, c.ps1}
- UNPCKHPD(tmp, fpr.R(c));
+ UNPCKHPD(tmp, Rc);
break;
case 11: // ps_sum1: {c.ps0, a.ps0 + b.ps1}
- if (fpr.R(c).IsSimpleReg())
+ if (Rc.IsSimpleReg())
{
if (cpu_info.bSSE4_1)
{
- BLENDPD(tmp, fpr.R(c), 1);
+ BLENDPD(tmp, Rc, 1);
}
else
{
- MOVAPD(XMM0, fpr.R(c));
+ MOVAPD(XMM0, Rc);
SHUFPD(XMM0, R(tmp), 2);
tmp = XMM0;
}
}
else
{
- MOVLPD(tmp, fpr.R(c));
+ MOVLPD(tmp, Rc);
}
break;
default:
PanicAlert("ps_sum WTF!!!");
}
- HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1);
- ForceSinglePrecision(fpr.RX(d), fpr.R(d));
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
+ HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1);
+ ForceSinglePrecision(Rd, Rd);
+ SetFPRFIfNeeded(Rd);
}
void Jit64::ps_muls(UGeckoInstruction inst)
@@ -85,26 +90,29 @@ void Jit64::ps_muls(UGeckoInstruction inst)
int a = inst.FA;
int c = inst.FC;
bool round_input = !js.op->fprIsSingle[c];
- fpr.Lock(a, c, d);
+
+ RCOpArg Ra = fpr.Use(a, RCMode::Read);
+ RCOpArg Rc = fpr.Use(c, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rc, Rd);
+
switch (inst.SUBOP5)
{
case 12: // ps_muls0
- MOVDDUP(XMM1, fpr.R(c));
+ MOVDDUP(XMM1, Rc);
break;
case 13: // ps_muls1
- avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
+ avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
break;
default:
PanicAlert("ps_muls WTF!!!");
}
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
- MULPD(XMM1, fpr.R(a));
- fpr.BindToRegister(d, false);
- HandleNaNs(inst, fpr.RX(d), XMM1);
- ForceSinglePrecision(fpr.RX(d), fpr.R(d));
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
+ MULPD(XMM1, Ra);
+ HandleNaNs(inst, Rd, XMM1);
+ ForceSinglePrecision(Rd, Rd);
+ SetFPRFIfNeeded(Rd);
}
void Jit64::ps_mergeXX(UGeckoInstruction inst)
@@ -116,27 +124,29 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
- fpr.Lock(a, b, d);
- fpr.BindToRegister(d, d == a || d == b);
+
+ RCOpArg Ra = fpr.Use(a, RCMode::Read);
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Ra, Rb, Rd);
switch (inst.SUBOP10)
{
case 528:
- avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, fpr.RX(d), fpr.R(a), fpr.R(b));
+ avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, Rd, Ra, Rb);
break; // 00
case 560:
- avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 2);
+ avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 2);
break; // 01
case 592:
- avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 1);
+ avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 1);
break; // 10
case 624:
- avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, fpr.RX(d), fpr.R(a), fpr.R(b));
+ avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, Rd, Ra, Rb);
break; // 11
default:
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
}
- fpr.UnlockAll();
}
void Jit64::ps_rsqrte(UGeckoInstruction inst)
@@ -147,23 +157,21 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
- gpr.FlushLockX(RSCRATCH_EXTRA);
- fpr.Lock(b, d);
- fpr.BindToRegister(b, true, false);
- fpr.BindToRegister(d, false);
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
+ RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(scratch_guard, Rb, Rd);
- MOVSD(XMM0, fpr.R(b));
+ MOVSD(XMM0, Rb);
CALL(asm_routines.frsqrte);
- MOVSD(fpr.R(d), XMM0);
+ MOVSD(Rd, XMM0);
- MOVHLPS(XMM0, fpr.RX(b));
+ MOVHLPS(XMM0, Rb);
CALL(asm_routines.frsqrte);
- MOVLHPS(fpr.RX(d), XMM0);
+ MOVLHPS(Rd, XMM0);
- ForceSinglePrecision(fpr.RX(d), fpr.R(d));
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
- gpr.UnlockAllX();
+ ForceSinglePrecision(Rd, Rd);
+ SetFPRFIfNeeded(Rd);
}
void Jit64::ps_res(UGeckoInstruction inst)
@@ -174,23 +182,21 @@ void Jit64::ps_res(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
- gpr.FlushLockX(RSCRATCH_EXTRA);
- fpr.Lock(b, d);
- fpr.BindToRegister(b, true, false);
- fpr.BindToRegister(d, false);
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
+ RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(scratch_guard, Rb, Rd);
- MOVSD(XMM0, fpr.R(b));
+ MOVSD(XMM0, Rb);
CALL(asm_routines.fres);
- MOVSD(fpr.R(d), XMM0);
+ MOVSD(Rd, XMM0);
- MOVHLPS(XMM0, fpr.RX(b));
+ MOVHLPS(XMM0, Rb);
CALL(asm_routines.fres);
- MOVLHPS(fpr.RX(d), XMM0);
+ MOVLHPS(Rd, XMM0);
- ForceSinglePrecision(fpr.RX(d), fpr.R(d));
- SetFPRFIfNeeded(fpr.RX(d));
- fpr.UnlockAll();
- gpr.UnlockAllX();
+ ForceSinglePrecision(Rd, Rd);
+ SetFPRFIfNeeded(Rd);
}
void Jit64::ps_cmpXX(UGeckoInstruction inst)
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
index 37631239fe..2ff1c37986 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
@@ -9,7 +9,7 @@
#include "Core/CoreTiming.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/Jit64/Jit.h"
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"
@@ -219,26 +219,32 @@ void Jit64::mtspr(UGeckoInstruction inst)
break;
case SPR_XER:
- gpr.Lock(d);
- gpr.BindToRegister(d, true, false);
- MOV(32, R(RSCRATCH), gpr.R(d));
+ {
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Read);
+ RegCache::Realize(Rd);
+
+ MOV(32, R(RSCRATCH), Rd);
AND(32, R(RSCRATCH), Imm32(0xff7f));
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
- MOV(32, R(RSCRATCH), gpr.R(d));
+ MOV(32, R(RSCRATCH), Rd);
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(8, R(RSCRATCH), Imm8(1));
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
- MOV(32, R(RSCRATCH), gpr.R(d));
+ MOV(32, R(RSCRATCH), Rd);
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
- gpr.UnlockAll();
+
return;
+ }
case SPR_HID0:
{
- MOV(32, R(RSCRATCH), gpr.R(d));
+ RCOpArg Rd = gpr.Use(d, RCMode::Read);
+ RegCache::Realize(Rd);
+
+ MOV(32, R(RSCRATCH), Rd);
BTR(32, R(RSCRATCH), Imm8(31 - 20)); // ICFI
MOV(32, PPCSTATE(spr[iIndex]), R(RSCRATCH));
FixupBranch dont_reset_icache = J_CC(CC_NC);
@@ -255,13 +261,9 @@ void Jit64::mtspr(UGeckoInstruction inst)
}
// OK, this is easy.
- if (!gpr.R(d).IsImm())
- {
- gpr.Lock(d);
- gpr.BindToRegister(d, true, false);
- }
- MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d));
- gpr.UnlockAll();
+ RCOpArg Rd = gpr.BindOrImm(d, RCMode::Read);
+ RegCache::Realize(Rd);
+ MOV(32, PPCSTATE(spr[iIndex]), Rd);
}
void Jit64::mfspr(UGeckoInstruction inst)
@@ -281,22 +283,23 @@ void Jit64::mfspr(UGeckoInstruction inst)
// redundant for the JIT.
// no register choice
- gpr.FlushLockX(RDX, RAX);
- gpr.FlushLockX(RCX);
+ RCX64Reg rdx = gpr.Scratch(RDX);
+ RCX64Reg rax = gpr.Scratch(RAX);
+ RCX64Reg rcx = gpr.Scratch(RCX);
- MOV(64, R(RCX), ImmPtr(&CoreTiming::g));
+ MOV(64, rcx, ImmPtr(&CoreTiming::g));
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
// Scale downcount by the CPU overclocking factor.
CVTSI2SS(XMM0, PPCSTATE(downcount));
- MULSS(XMM0, MDisp(RCX, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
- CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
- MOV(32, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, slice_length)));
- SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength -
- // Scaled_downcount)
- ADD(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, global_timer)));
- SUB(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
+ MULSS(XMM0, MDisp(rcx, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
+ CVTSS2SI(rdx, R(XMM0)); // RDX is downcount scaled by the overclocking factor
+ MOV(32, rax, MDisp(rcx, offsetof(CoreTiming::Globals, slice_length)));
+ SUB(64, rax, rdx); // cycles since the last CoreTiming::Advance() event is (slicelength -
+ // Scaled_downcount)
+ ADD(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, global_timer)));
+ SUB(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
// It might seem convenient to correct the timer for the block position here for even more
// accurate
// timing, but as of currently, this can break games. If we end up reading a time *after* the
@@ -307,15 +310,15 @@ void Jit64::mfspr(UGeckoInstruction inst)
// Revolution,
// which won't get past the loading screen.
// if (js.downcountAmount)
- // ADD(64, R(RAX), Imm32(js.downcountAmount));
+ // ADD(64, rax, Imm32(js.downcountAmount));
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
- MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
- MUL(64, R(RDX));
- MOV(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_value)));
- SHR(64, R(RDX), Imm8(3));
- ADD(64, R(RAX), R(RDX));
- MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
+ MOV(64, rdx, Imm64(0xAAAAAAAAAAAAAAABULL));
+ MUL(64, rdx);
+ MOV(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_value)));
+ SHR(64, rdx, Imm8(3));
+ ADD(64, rax, rdx);
+ MOV(64, PPCSTATE(spr[SPR_TL]), rax);
if (CanMergeNextInstructions(1))
{
@@ -330,40 +333,42 @@ void Jit64::mfspr(UGeckoInstruction inst)
{
js.downcountAmount++;
js.skipInstructions = 1;
- gpr.Lock(d, n);
- gpr.BindToRegister(d, false);
- gpr.BindToRegister(n, false);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RCX64Reg Rn = gpr.Bind(n, RCMode::Write);
+ RegCache::Realize(Rd, Rn);
if (iIndex == SPR_TL)
- MOV(32, gpr.R(d), R(RAX));
+ MOV(32, Rd, rax);
if (nextIndex == SPR_TL)
- MOV(32, gpr.R(n), R(RAX));
- SHR(64, R(RAX), Imm8(32));
+ MOV(32, Rn, rax);
+ SHR(64, rax, Imm8(32));
if (iIndex == SPR_TU)
- MOV(32, gpr.R(d), R(RAX));
+ MOV(32, Rd, rax);
if (nextIndex == SPR_TU)
- MOV(32, gpr.R(n), R(RAX));
+ MOV(32, Rn, rax);
break;
}
}
- gpr.Lock(d);
- gpr.BindToRegister(d, false);
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rd);
if (iIndex == SPR_TU)
- SHR(64, R(RAX), Imm8(32));
- MOV(32, gpr.R(d), R(RAX));
+ SHR(64, rax, Imm8(32));
+ MOV(32, Rd, rax);
break;
}
case SPR_XER:
- gpr.Lock(d);
- gpr.BindToRegister(d, false);
- MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
+ {
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rd);
+ MOVZX(32, 16, Rd, PPCSTATE(xer_stringctrl));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
- OR(32, gpr.R(d), R(RSCRATCH));
+ OR(32, Rd, R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
- OR(32, gpr.R(d), R(RSCRATCH));
+ OR(32, Rd, R(RSCRATCH));
break;
+ }
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1:
@@ -372,26 +377,25 @@ void Jit64::mfspr(UGeckoInstruction inst)
case SPR_PMC4:
FALLBACK_IF(true);
default:
- gpr.Lock(d);
- gpr.BindToRegister(d, false);
- MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
+ {
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rd);
+ MOV(32, Rd, PPCSTATE(spr[iIndex]));
break;
}
- gpr.UnlockAllX();
- gpr.UnlockAll();
+ }
}
void Jit64::mtmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
- if (!gpr.R(inst.RS).IsImm())
+
{
- gpr.Lock(inst.RS);
- gpr.BindToRegister(inst.RS, true, false);
+ RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
+ RegCache::Realize(Rs);
+ MOV(32, PPCSTATE(msr), Rs);
}
- MOV(32, PPCSTATE(msr), gpr.R(inst.RS));
- gpr.UnlockAll();
gpr.Flush();
fpr.Flush();
@@ -430,10 +434,9 @@ void Jit64::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// Privileged?
- gpr.Lock(inst.RD);
- gpr.BindToRegister(inst.RD, false, true);
- MOV(32, gpr.R(inst.RD), PPCSTATE(msr));
- gpr.UnlockAll();
+ RCX64Reg Rd = gpr.Bind(inst.RD, RCMode::Write);
+ RegCache::Realize(Rd);
+ MOV(32, Rd, PPCSTATE(msr));
}
void Jit64::mftb(UGeckoInstruction inst)
@@ -448,13 +451,13 @@ void Jit64::mfcr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
int d = inst.RD;
- gpr.FlushLockX(RSCRATCH_EXTRA);
+
+ RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
CALL(asm_routines.mfcr);
- gpr.Lock(d);
- gpr.BindToRegister(d, false, true);
- MOV(32, gpr.R(d), R(RSCRATCH));
- gpr.UnlockAll();
- gpr.UnlockAllX();
+
+ RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rd);
+ MOV(32, Rd, R(RSCRATCH));
}
void Jit64::mtcrf(UGeckoInstruction inst)
@@ -466,13 +469,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
u32 crm = inst.CRM;
if (crm != 0)
{
- if (gpr.R(inst.RS).IsImm())
+ if (gpr.IsImm(inst.RS))
{
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
- u8 newcr = (gpr.R(inst.RS).Imm32() >> (28 - (i * 4))) & 0xF;
+ u8 newcr = (gpr.Imm32(inst.RS) >> (28 - (i * 4))) & 0xF;
u64 newcrval = PowerPC::PPCCRToInternal(newcr);
if ((s64)newcrval == (s32)newcrval)
{
@@ -489,13 +492,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
else
{
MOV(64, R(RSCRATCH2), ImmPtr(PowerPC::m_crTable.data()));
- gpr.Lock(inst.RS);
- gpr.BindToRegister(inst.RS, true, false);
+ RCX64Reg Rs = gpr.Bind(inst.RS, RCMode::Read);
+ RegCache::Realize(Rs);
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
- MOV(32, R(RSCRATCH), gpr.R(inst.RS));
+ MOV(32, R(RSCRATCH), Rs);
if (i != 7)
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
if (i != 0)
@@ -504,7 +507,6 @@ void Jit64::mtcrf(UGeckoInstruction inst)
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
}
}
- gpr.UnlockAll();
}
}
}
@@ -653,11 +655,12 @@ void Jit64::mffsx(UGeckoInstruction inst)
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
int d = inst.FD;
- fpr.BindToRegister(d, false, true);
+ RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
+ RegCache::Realize(Rd);
MOV(64, R(RSCRATCH2), Imm64(0xFFF8000000000000));
OR(64, R(RSCRATCH), R(RSCRATCH2));
MOVQ_xmm(XMM0, R(RSCRATCH));
- MOVSD(fpr.RX(d), R(XMM0));
+ MOVSD(Rd, R(XMM0));
}
// MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
@@ -751,10 +754,14 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
}
int b = inst.FB;
- if (fpr.R(b).IsSimpleReg())
- MOVQ_xmm(R(RSCRATCH), fpr.RX(b));
+
+ RCOpArg Rb = fpr.Use(b, RCMode::Read);
+ RegCache::Realize(Rb);
+
+ if (Rb.IsSimpleReg())
+ MOVQ_xmm(R(RSCRATCH), Rb.GetSimpleReg());
else
- MOV(32, R(RSCRATCH), fpr.R(b));
+ MOV(32, R(RSCRATCH), Rb);
MOV(32, R(RSCRATCH2), PPCSTATE(fpscr));
AND(32, R(RSCRATCH), Imm32(mask));
diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h
new file mode 100644
index 0000000000..3c7d5b4912
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h
@@ -0,0 +1,284 @@
+// Copyright 2008 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include
+
+#include "Common/Assert.h"
+#include "Common/CommonTypes.h"
+#include "Common/x64Emitter.h"
+#include "Core/PowerPC/Jit64/RegCache/RCMode.h"
+
+using preg_t = size_t;
+
+class PPCCachedReg
+{
+public:
+ enum class LocationType
+ {
+ /// Value is currently at its default location
+ Default,
+ /// Value is currently bound to a x64 register
+ Bound,
+ /// Value is known as an immediate and has not been written back to its default location
+ Immediate,
+ /// Value is known as an immediate and is already present at its default location
+ SpeculativeImmediate,
+ };
+
+ PPCCachedReg() = default;
+
+ explicit PPCCachedReg(Gen::OpArg default_location_)
+ : default_location(default_location_), location(default_location_)
+ {
+ }
+
+ const Gen::OpArg& Location() const { return location; }
+
+ LocationType GetLocationType() const
+ {
+ if (!away)
+ {
+ ASSERT(!revertable);
+
+ if (location.IsImm())
+ return LocationType::SpeculativeImmediate;
+
+ ASSERT(location == default_location);
+ return LocationType::Default;
+ }
+
+ ASSERT(location.IsImm() || location.IsSimpleReg());
+ return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
+ }
+
+ bool IsAway() const { return away; }
+ bool IsBound() const { return GetLocationType() == LocationType::Bound; }
+
+ void SetBoundTo(Gen::X64Reg xreg)
+ {
+ away = true;
+ location = Gen::R(xreg);
+ }
+
+ void SetFlushed()
+ {
+ ASSERT(!revertable);
+ away = false;
+ location = default_location;
+ }
+
+ void SetToImm32(u32 imm32, bool dirty = true)
+ {
+ away |= dirty;
+ location = Gen::Imm32(imm32);
+ }
+
+ bool IsRevertable() const { return revertable; }
+ void SetRevertable()
+ {
+ ASSERT(IsBound());
+ revertable = true;
+ }
+ void SetRevert()
+ {
+ ASSERT(revertable);
+ revertable = false;
+ SetFlushed();
+ }
+ void SetCommit()
+ {
+ ASSERT(revertable);
+ revertable = false;
+ }
+
+ bool IsLocked() const { return locked > 0; }
+ void Lock() { locked++; }
+ void Unlock()
+ {
+ ASSERT(IsLocked());
+ locked--;
+ }
+
+private:
+ Gen::OpArg default_location{};
+ Gen::OpArg location{};
+ bool away = false; // value not in source register
+ bool revertable = false;
+ size_t locked = 0;
+};
+
+class X64CachedReg
+{
+public:
+ preg_t Contents() const { return ppcReg; }
+
+ void SetBoundTo(preg_t ppcReg_, bool dirty_)
+ {
+ free = false;
+ ppcReg = ppcReg_;
+ dirty = dirty_;
+ }
+
+ void SetFlushed()
+ {
+ ppcReg = static_cast(Gen::INVALID_REG);
+ free = true;
+ dirty = false;
+ }
+
+ bool IsFree() const { return free && !locked; }
+
+ bool IsDirty() const { return dirty; }
+ void MakeDirty() { dirty = true; }
+
+ bool IsLocked() const { return locked > 0; }
+ void Lock() { locked++; }
+ void Unlock()
+ {
+ ASSERT(IsLocked());
+ locked--;
+ }
+
+private:
+ preg_t ppcReg = static_cast(Gen::INVALID_REG);
+ bool free = true;
+ bool dirty = false;
+ size_t locked = 0;
+};
+
+class RCConstraint
+{
+public:
+ bool IsRealized() const { return realized != RealizedLoc::Invalid; }
+ bool IsActive() const
+ {
+ return IsRealized() || write || read || kill_imm || kill_mem || revertable;
+ }
+
+ bool ShouldLoad() const { return read; }
+ bool ShouldDirty() const { return write; }
+ bool ShouldBeRevertable() const { return revertable; }
+ bool ShouldKillImmediate() const { return kill_imm; }
+ bool ShouldKillMemory() const { return kill_mem; }
+
+ enum class RealizedLoc
+ {
+ Invalid,
+ Bound,
+ Imm,
+ Mem,
+ };
+
+ void Realized(RealizedLoc loc)
+ {
+ realized = loc;
+ ASSERT(IsRealized());
+ }
+
+ enum class ConstraintLoc
+ {
+ Bound,
+ BoundOrImm,
+ BoundOrMem,
+ Any,
+ };
+
+ void AddUse(RCMode mode) { AddConstraint(mode, ConstraintLoc::Any, false); }
+ void AddUseNoImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrMem, false); }
+ void AddBindOrImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrImm, false); }
+ void AddBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, false); }
+ void AddRevertableBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, true); }
+
+private:
+ void AddConstraint(RCMode mode, ConstraintLoc loc, bool should_revertable)
+ {
+ if (IsRealized())
+ {
+ ASSERT(IsCompatible(mode, loc, should_revertable));
+ return;
+ }
+
+ if (should_revertable)
+ revertable = true;
+
+ switch (loc)
+ {
+ case ConstraintLoc::Bound:
+ kill_imm = true;
+ kill_mem = true;
+ break;
+ case ConstraintLoc::BoundOrImm:
+ kill_mem = true;
+ break;
+ case ConstraintLoc::BoundOrMem:
+ kill_imm = true;
+ break;
+ case ConstraintLoc::Any:
+ break;
+ }
+
+ switch (mode)
+ {
+ case RCMode::Read:
+ read = true;
+ break;
+ case RCMode::Write:
+ write = true;
+ break;
+ case RCMode::ReadWrite:
+ read = true;
+ write = true;
+ break;
+ }
+ }
+
+ bool IsCompatible(RCMode mode, ConstraintLoc loc, bool should_revertable) const
+ {
+ if (should_revertable && !revertable)
+ {
+ return false;
+ }
+
+ const bool is_loc_compatible = [&] {
+ switch (loc)
+ {
+ case ConstraintLoc::Bound:
+ return realized == RealizedLoc::Bound;
+ case ConstraintLoc::BoundOrImm:
+ return realized == RealizedLoc::Bound || realized == RealizedLoc::Imm;
+ case ConstraintLoc::BoundOrMem:
+ return realized == RealizedLoc::Bound || realized == RealizedLoc::Mem;
+ case ConstraintLoc::Any:
+ return true;
+ }
+ ASSERT(false);
+ return false;
+ }();
+
+ const bool is_mode_compatible = [&] {
+ switch (mode)
+ {
+ case RCMode::Read:
+ return read;
+ case RCMode::Write:
+ return write;
+ case RCMode::ReadWrite:
+ return read && write;
+ }
+ ASSERT(false);
+ return false;
+ }();
+
+ return is_loc_compatible && is_mode_compatible;
+ }
+
+ RealizedLoc realized = RealizedLoc::Invalid;
+ bool write = false;
+ bool read = false;
+ bool kill_imm = false;
+ bool kill_mem = false;
+ bool revertable = false;
+};
diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
similarity index 96%
rename from Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp
rename to Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
index 9077f388c8..f671afb74c 100644
--- a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
@@ -2,7 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
-#include "Core/PowerPC/Jit64/FPURegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h
similarity index 92%
rename from Source/Core/Core/PowerPC/Jit64/FPURegCache.h
rename to Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h
index 1ecbde2bea..fd7d2bb53c 100644
--- a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h
@@ -4,7 +4,7 @@
#pragma once
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
class Jit64;
@@ -12,9 +12,9 @@ class FPURegCache final : public RegCache
{
public:
explicit FPURegCache(Jit64& jit);
- Gen::OpArg GetDefaultLocation(preg_t preg) const override;
protected:
+ Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;
diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp
similarity index 97%
rename from Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp
rename to Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp
index 18b0e70602..1671d37a51 100644
--- a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp
@@ -2,7 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
-#include "Core/PowerPC/Jit64/GPRRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h
similarity index 92%
rename from Source/Core/Core/PowerPC/Jit64/GPRRegCache.h
rename to Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h
index 0b383cf94f..a80182ad92 100644
--- a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h
@@ -4,7 +4,7 @@
#pragma once
-#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
class Jit64;
@@ -12,10 +12,10 @@ class GPRRegCache final : public RegCache
{
public:
explicit GPRRegCache(Jit64& jit);
- Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true);
protected:
+ Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override;
void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;
diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp
new file mode 100644
index 0000000000..9b7fc14cd9
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp
@@ -0,0 +1,729 @@
+// Copyright 2008 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "Common/Assert.h"
+#include "Common/BitSet.h"
+#include "Common/CommonTypes.h"
+#include "Common/MsgHandler.h"
+#include "Common/VariantUtil.h"
+#include "Common/x64Emitter.h"
+#include "Core/PowerPC/Jit64/Jit.h"
+#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
+#include "Core/PowerPC/Jit64/RegCache/RCMode.h"
+#include "Core/PowerPC/PowerPC.h"
+
+using namespace Gen;
+using namespace PowerPC;
+
+RCOpArg RCOpArg::Imm32(u32 imm)
+{
+ return RCOpArg{imm};
+}
+
+RCOpArg RCOpArg::R(X64Reg xr)
+{
+ return RCOpArg{xr};
+}
+
+RCOpArg::RCOpArg() = default;
+
+RCOpArg::RCOpArg(u32 imm) : rc(nullptr), contents(imm)
+{
+}
+
+RCOpArg::RCOpArg(X64Reg xr) : rc(nullptr), contents(xr)
+{
+}
+
+RCOpArg::RCOpArg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg)
+{
+ rc->Lock(preg);
+}
+
+RCOpArg::~RCOpArg()
+{
+ Unlock();
+}
+
+RCOpArg::RCOpArg(RCOpArg&& other) noexcept
+ : rc(std::exchange(other.rc, nullptr)),
+ contents(std::exchange(other.contents, std::monostate{}))
+{
+}
+
+RCOpArg& RCOpArg::operator=(RCOpArg&& other) noexcept
+{
+ Unlock();
+ rc = std::exchange(other.rc, nullptr);
+ contents = std::exchange(other.contents, std::monostate{});
+ return *this;
+}
+
+RCOpArg::RCOpArg(RCX64Reg&& other) noexcept
+ : rc(std::exchange(other.rc, nullptr)),
+ contents(VariantCast(std::exchange(other.contents, std::monostate{})))
+{
+}
+
+RCOpArg& RCOpArg::operator=(RCX64Reg&& other) noexcept
+{
+ Unlock();
+ rc = std::exchange(other.rc, nullptr);
+ contents = VariantCast(std::exchange(other.contents, std::monostate{}));
+ return *this;
+}
+
+void RCOpArg::Realize()
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ rc->Realize(*preg);
+ }
+}
+
+OpArg RCOpArg::Location() const
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ ASSERT(rc->IsRealized(*preg));
+ return rc->R(*preg);
+ }
+ else if (const X64Reg* xr = std::get_if(&contents))
+ {
+ return Gen::R(*xr);
+ }
+ else if (const u32* imm = std::get_if(&contents))
+ {
+ return Gen::Imm32(*imm);
+ }
+ ASSERT(false);
+ return {};
+}
+
+OpArg RCOpArg::ExtractWithByteOffset(int offset)
+{
+ if (offset == 0)
+ return Location();
+
+ ASSERT(rc);
+ const preg_t preg = std::get(contents);
+ rc->StoreFromRegister(preg, RegCache::FlushMode::MaintainState);
+ OpArg result = rc->GetDefaultLocation(preg);
+ result.AddMemOffset(offset);
+ return result;
+}
+
+void RCOpArg::Unlock()
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ ASSERT(rc);
+ rc->Unlock(*preg);
+ }
+ else if (const X64Reg* xr = std::get_if(&contents))
+ {
+ // If rc, we got this from an RCX64Reg.
+ // If !rc, we got this from RCOpArg::R.
+ if (rc)
+ rc->UnlockX(*xr);
+ }
+ else
+ {
+ ASSERT(!rc);
+ }
+
+ rc = nullptr;
+ contents = std::monostate{};
+}
+
+bool RCOpArg::IsImm() const
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ return rc->R(*preg).IsImm();
+ }
+ else if (std::holds_alternative(contents))
+ {
+ return true;
+ }
+ return false;
+}
+
+s32 RCOpArg::SImm32() const
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ return rc->R(*preg).SImm32();
+ }
+ else if (const u32* imm = std::get_if(&contents))
+ {
+ return static_cast(*imm);
+ }
+ ASSERT(false);
+ return 0;
+}
+
+u32 RCOpArg::Imm32() const
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ return rc->R(*preg).Imm32();
+ }
+ else if (const u32* imm = std::get_if(&contents))
+ {
+ return *imm;
+ }
+ ASSERT(false);
+ return 0;
+}
+
+RCX64Reg::RCX64Reg() = default;
+
+RCX64Reg::RCX64Reg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg)
+{
+ rc->Lock(preg);
+}
+
+RCX64Reg::RCX64Reg(RegCache* rc_, X64Reg xr) : rc(rc_), contents(xr)
+{
+ rc->LockX(xr);
+}
+
+RCX64Reg::~RCX64Reg()
+{
+ Unlock();
+}
+
+RCX64Reg::RCX64Reg(RCX64Reg&& other) noexcept
+ : rc(std::exchange(other.rc, nullptr)),
+ contents(std::exchange(other.contents, std::monostate{}))
+{
+}
+
+RCX64Reg& RCX64Reg::operator=(RCX64Reg&& other) noexcept
+{
+ Unlock();
+ rc = std::exchange(other.rc, nullptr);
+ contents = std::exchange(other.contents, std::monostate{});
+ return *this;
+}
+
+void RCX64Reg::Realize()
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ rc->Realize(*preg);
+ }
+}
+
+RCX64Reg::operator X64Reg() const &
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ ASSERT(rc->IsRealized(*preg));
+ return rc->RX(*preg);
+ }
+ else if (const X64Reg* xr = std::get_if(&contents))
+ {
+ return *xr;
+ }
+ ASSERT(false);
+ return {};
+}
+
+RCX64Reg::operator OpArg() const &
+{
+ return Gen::R(RCX64Reg::operator X64Reg());
+}
+
+void RCX64Reg::Unlock()
+{
+ if (const preg_t* preg = std::get_if(&contents))
+ {
+ ASSERT(rc);
+ rc->Unlock(*preg);
+ }
+ else if (const X64Reg* xr = std::get_if(&contents))
+ {
+ ASSERT(rc);
+ rc->UnlockX(*xr);
+ }
+ else
+ {
+ ASSERT(!rc);
+ }
+
+ rc = nullptr;
+ contents = std::monostate{};
+}
+
+RCForkGuard::RCForkGuard(RegCache& rc_) : rc(&rc_), m_regs(rc_.m_regs), m_xregs(rc_.m_xregs)
+{
+ ASSERT(!rc->IsAnyConstraintActive());
+}
+
+RCForkGuard::RCForkGuard(RCForkGuard&& other) noexcept
+ : rc(other.rc), m_regs(std::move(other.m_regs)), m_xregs(std::move(other.m_xregs))
+{
+ other.rc = nullptr;
+}
+
+void RCForkGuard::EndFork()
+{
+ if (!rc)
+ return;
+
+ ASSERT(!rc->IsAnyConstraintActive());
+ rc->m_regs = m_regs;
+ rc->m_xregs = m_xregs;
+ rc = nullptr;
+}
+
+RegCache::RegCache(Jit64& jit) : m_jit{jit}
+{
+}
+
+void RegCache::Start()
+{
+ m_xregs.fill({});
+ for (size_t i = 0; i < m_regs.size(); i++)
+ {
+ m_regs[i] = PPCCachedReg{GetDefaultLocation(i)};
+ }
+}
+
+void RegCache::SetEmitter(XEmitter* emitter)
+{
+ m_emitter = emitter;
+}
+
+bool RegCache::SanityCheck() const
+{
+ for (size_t i = 0; i < m_regs.size(); i++)
+ {
+ switch (m_regs[i].GetLocationType())
+ {
+ case PPCCachedReg::LocationType::Default:
+ case PPCCachedReg::LocationType::SpeculativeImmediate:
+ case PPCCachedReg::LocationType::Immediate:
+ break;
+ case PPCCachedReg::LocationType::Bound:
+ {
+ if (m_regs[i].IsLocked() || m_regs[i].IsRevertable())
+ return false;
+
+ Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
+ if (m_xregs[xr].IsLocked())
+ return false;
+ if (m_xregs[xr].Contents() != i)
+ return false;
+ break;
+ }
+ }
+ }
+ return true;
+}
+
+RCOpArg RegCache::Use(preg_t preg, RCMode mode)
+{
+ m_constraints[preg].AddUse(mode);
+ return RCOpArg{this, preg};
+}
+
+RCOpArg RegCache::UseNoImm(preg_t preg, RCMode mode)
+{
+ m_constraints[preg].AddUseNoImm(mode);
+ return RCOpArg{this, preg};
+}
+
+RCOpArg RegCache::BindOrImm(preg_t preg, RCMode mode)
+{
+ m_constraints[preg].AddBindOrImm(mode);
+ return RCOpArg{this, preg};
+}
+
+RCX64Reg RegCache::Bind(preg_t preg, RCMode mode)
+{
+ m_constraints[preg].AddBind(mode);
+ return RCX64Reg{this, preg};
+}
+
+RCX64Reg RegCache::RevertableBind(preg_t preg, RCMode mode)
+{
+ m_constraints[preg].AddRevertableBind(mode);
+ return RCX64Reg{this, preg};
+}
+
+RCX64Reg RegCache::Scratch()
+{
+ return Scratch(GetFreeXReg());
+}
+
+RCX64Reg RegCache::Scratch(X64Reg xr)
+{
+ FlushX(xr);
+ return RCX64Reg{this, xr};
+}
+
+RCForkGuard RegCache::Fork()
+{
+ return RCForkGuard{*this};
+}
+
+void RegCache::Flush(BitSet32 pregs)
+{
+ ASSERT_MSG(
+ DYNA_REC,
+ std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
+ "Someone forgot to unlock a X64 reg");
+
+ for (preg_t i : pregs)
+ {
+ ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(),
+ "Someone forgot to unlock PPC reg %zu (X64 reg %i).", i, RX(i));
+ ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
+
+ switch (m_regs[i].GetLocationType())
+ {
+ case PPCCachedReg::LocationType::Default:
+ break;
+ case PPCCachedReg::LocationType::SpeculativeImmediate:
+ // We can have a cached value without a host register through speculative constants.
+ // It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
+ // if PPCSTATE is modified externally (e.g. fallback to interpreter).
+ m_regs[i].SetFlushed();
+ break;
+ case PPCCachedReg::LocationType::Bound:
+ case PPCCachedReg::LocationType::Immediate:
+ StoreFromRegister(i);
+ break;
+ }
+ }
+}
+
+void RegCache::Revert()
+{
+ ASSERT(IsAllUnlocked());
+ for (auto& reg : m_regs)
+ {
+ if (reg.IsRevertable())
+ reg.SetRevert();
+ }
+}
+
+void RegCache::Commit()
+{
+ ASSERT(IsAllUnlocked());
+ for (auto& reg : m_regs)
+ {
+ if (reg.IsRevertable())
+ reg.SetCommit();
+ }
+}
+
+bool RegCache::IsAllUnlocked() const
+{
+ return std::none_of(m_regs.begin(), m_regs.end(), [](const auto& r) { return r.IsLocked(); }) &&
+ std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }) &&
+ !IsAnyConstraintActive();
+}
+
+void RegCache::PreloadRegisters(BitSet32 to_preload)
+{
+ for (preg_t preg : to_preload)
+ {
+ if (NumFreeRegisters() < 2)
+ return;
+ if (!R(preg).IsImm())
+ BindToRegister(preg, true, false);
+ }
+}
+
+BitSet32 RegCache::RegistersInUse() const
+{
+ BitSet32 result;
+ for (size_t i = 0; i < m_xregs.size(); i++)
+ {
+ if (!m_xregs[i].IsFree())
+ result[i] = true;
+ }
+ return result;
+}
+
+void RegCache::FlushX(X64Reg reg)
+{
+ ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg);
+ ASSERT(!m_xregs[reg].IsLocked());
+ if (!m_xregs[reg].IsFree())
+ {
+ StoreFromRegister(m_xregs[reg].Contents());
+ }
+}
+
+void RegCache::DiscardRegContentsIfCached(preg_t preg)
+{
+ if (m_regs[preg].IsBound())
+ {
+ X64Reg xr = m_regs[preg].Location().GetSimpleReg();
+ m_xregs[xr].SetFlushed();
+ m_regs[preg].SetFlushed();
+ }
+}
+
+void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
+{
+ if (!m_regs[i].IsBound())
+ {
+ X64Reg xr = GetFreeXReg();
+
+ ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr);
+ ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
+ ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state");
+
+ m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
+
+ if (doLoad)
+ {
+ LoadRegister(i, xr);
+ }
+
+ ASSERT_MSG(DYNA_REC,
+ std::none_of(m_regs.begin(), m_regs.end(),
+ [xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
+ "Xreg %i already bound", xr);
+
+ m_regs[i].SetBoundTo(xr);
+ }
+ else
+ {
+ // reg location must be simplereg; memory locations
+ // and immediates are taken care of above.
+ if (makeDirty)
+ m_xregs[RX(i)].MakeDirty();
+ }
+
+ ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed");
+}
+
+void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
+{
+ // When a transaction is in progress, allowing the store would overwrite the old value.
+ ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
+
+ bool doStore = false;
+
+ switch (m_regs[i].GetLocationType())
+ {
+ case PPCCachedReg::LocationType::Default:
+ case PPCCachedReg::LocationType::SpeculativeImmediate:
+ return;
+ case PPCCachedReg::LocationType::Bound:
+ {
+ X64Reg xr = RX(i);
+ doStore = m_xregs[xr].IsDirty();
+ if (mode == FlushMode::Full)
+ m_xregs[xr].SetFlushed();
+ break;
+ }
+ case PPCCachedReg::LocationType::Immediate:
+ doStore = true;
+ break;
+ }
+
+ if (doStore)
+ StoreRegister(i, GetDefaultLocation(i));
+ if (mode == FlushMode::Full)
+ m_regs[i].SetFlushed();
+}
+
+X64Reg RegCache::GetFreeXReg()
+{
+ size_t aCount;
+ const X64Reg* aOrder = GetAllocationOrder(&aCount);
+ for (size_t i = 0; i < aCount; i++)
+ {
+ X64Reg xr = aOrder[i];
+ if (m_xregs[xr].IsFree())
+ {
+ return xr;
+ }
+ }
+
+ // Okay, not found; run the register allocator heuristic and figure out which register we should
+ // clobber.
+ float min_score = std::numeric_limits::max();
+ X64Reg best_xreg = INVALID_REG;
+ size_t best_preg = 0;
+ for (size_t i = 0; i < aCount; i++)
+ {
+ X64Reg xreg = (X64Reg)aOrder[i];
+ preg_t preg = m_xregs[xreg].Contents();
+ if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked())
+ continue;
+ float score = ScoreRegister(xreg);
+ if (score < min_score)
+ {
+ min_score = score;
+ best_xreg = xreg;
+ best_preg = preg;
+ }
+ }
+
+ if (best_xreg != INVALID_REG)
+ {
+ StoreFromRegister(best_preg);
+ return best_xreg;
+ }
+
+ // Still no dice? Die!
+ ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs");
+ return INVALID_REG;
+}
+
+int RegCache::NumFreeRegisters() const
+{
+ int count = 0;
+ size_t aCount;
+ const X64Reg* aOrder = GetAllocationOrder(&aCount);
+ for (size_t i = 0; i < aCount; i++)
+ if (m_xregs[aOrder[i]].IsFree())
+ count++;
+ return count;
+}
+
+// Estimate roughly how bad it would be to de-allocate this register. Higher score
+// means more bad.
+float RegCache::ScoreRegister(X64Reg xreg) const
+{
+ preg_t preg = m_xregs[xreg].Contents();
+ float score = 0;
+
+ // If it's not dirty, we don't need a store to write it back to the register file, so
+ // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
+ // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
+ // to the number of extra stores it causes.
+ if (m_xregs[xreg].IsDirty())
+ score += 2;
+
+ // If the register isn't actually needed in a physical register for a later instruction,
+ // writing it back to the register file isn't quite as bad.
+ if (GetRegUtilization()[preg])
+ {
+ // Don't look too far ahead; we don't want to have quadratic compilation times for
+ // enormous block sizes!
+ // This actually improves register allocation a tiny bit; I'm not sure why.
+ u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
+ // Count how many other registers are going to be used before we need this one again.
+ u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
+ // Totally ad-hoc heuristic to bias based on how many other registers we'll need
+ // before this one gets used again.
+ score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
+ }
+
+ return score;
+}
+
+const OpArg& RegCache::R(preg_t preg) const
+{
+ return m_regs[preg].Location();
+}
+
+X64Reg RegCache::RX(preg_t preg) const
+{
+ ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
+ return m_regs[preg].Location().GetSimpleReg();
+}
+
+void RegCache::Lock(preg_t preg)
+{
+ m_regs[preg].Lock();
+}
+
+void RegCache::Unlock(preg_t preg)
+{
+ m_regs[preg].Unlock();
+ if (!m_regs[preg].IsLocked())
+ {
+ // Fully unlocked, reset realization state.
+ m_constraints[preg] = {};
+ }
+}
+
+void RegCache::LockX(X64Reg xr)
+{
+ m_xregs[xr].Lock();
+}
+
+void RegCache::UnlockX(X64Reg xr)
+{
+ m_xregs[xr].Unlock();
+}
+
+bool RegCache::IsRealized(preg_t preg) const
+{
+ return m_constraints[preg].IsRealized();
+}
+
+void RegCache::Realize(preg_t preg)
+{
+ if (m_constraints[preg].IsRealized())
+ return;
+
+ const bool load = m_constraints[preg].ShouldLoad();
+ const bool dirty = m_constraints[preg].ShouldDirty();
+ const bool kill_imm = m_constraints[preg].ShouldKillImmediate();
+ const bool kill_mem = m_constraints[preg].ShouldKillMemory();
+
+ const auto do_bind = [&] {
+ BindToRegister(preg, load, dirty);
+ m_constraints[preg].Realized(RCConstraint::RealizedLoc::Bound);
+ };
+
+ if (m_constraints[preg].ShouldBeRevertable())
+ {
+ StoreFromRegister(preg, FlushMode::MaintainState);
+ do_bind();
+ m_regs[preg].SetRevertable();
+ return;
+ }
+
+ switch (m_regs[preg].GetLocationType())
+ {
+ case PPCCachedReg::LocationType::Default:
+ if (kill_mem)
+ {
+ do_bind();
+ return;
+ }
+ m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem);
+ return;
+ case PPCCachedReg::LocationType::Bound:
+ do_bind();
+ return;
+ case PPCCachedReg::LocationType::Immediate:
+ case PPCCachedReg::LocationType::SpeculativeImmediate:
+ if (dirty || kill_imm)
+ {
+ do_bind();
+ return;
+ }
+ m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm);
+ break;
+ }
+}
+
+bool RegCache::IsAnyConstraintActive() const
+{
+ return std::any_of(m_constraints.begin(), m_constraints.end(),
+ [](const auto& c) { return c.IsActive(); });
+}
diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h
new file mode 100644
index 0000000000..522eb513bb
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h
@@ -0,0 +1,222 @@
+// Copyright 2008 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include "Common/x64Emitter.h"
+#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
+#include "Core/PowerPC/PPCAnalyst.h"
+
+class Jit64;
+enum class RCMode;
+
+class RCOpArg;
+class RCX64Reg;
+class RegCache;
+
+using preg_t = size_t;
+static constexpr size_t NUM_XREGS = 16;
+
+class RCOpArg
+{
+public:
+ static RCOpArg Imm32(u32 imm);
+ static RCOpArg R(Gen::X64Reg xr);
+ RCOpArg();
+ ~RCOpArg();
+ RCOpArg(RCOpArg&&) noexcept;
+ RCOpArg& operator=(RCOpArg&&) noexcept;
+
+ RCOpArg(RCX64Reg&&) noexcept;
+ RCOpArg& operator=(RCX64Reg&&) noexcept;
+
+ RCOpArg(const RCOpArg&) = delete;
+ RCOpArg& operator=(const RCOpArg&) = delete;
+
+ void Realize();
+ Gen::OpArg Location() const;
+ operator Gen::OpArg() const & { return Location(); }
+ operator Gen::OpArg() const && = delete;
+ bool IsSimpleReg() const { return Location().IsSimpleReg(); }
+ bool IsSimpleReg(Gen::X64Reg reg) const { return Location().IsSimpleReg(reg); }
+ Gen::X64Reg GetSimpleReg() const { return Location().GetSimpleReg(); }
+
+ // Use to extract bytes from a register using the regcache. offset is in bytes.
+ Gen::OpArg ExtractWithByteOffset(int offset);
+
+ void Unlock();
+
+ bool IsImm() const;
+ s32 SImm32() const;
+ u32 Imm32() const;
+ bool IsZero() const { return IsImm() && Imm32() == 0; }
+
+private:
+ friend class RegCache;
+
+ explicit RCOpArg(u32 imm);
+ explicit RCOpArg(Gen::X64Reg xr);
+ RCOpArg(RegCache* rc_, preg_t preg);
+
+ RegCache* rc = nullptr;
+ std::variant contents;
+};
+
+class RCX64Reg
+{
+public:
+ RCX64Reg();
+ ~RCX64Reg();
+ RCX64Reg(RCX64Reg&&) noexcept;
+ RCX64Reg& operator=(RCX64Reg&&) noexcept;
+
+ RCX64Reg(const RCX64Reg&) = delete;
+ RCX64Reg& operator=(const RCX64Reg&) = delete;
+
+ void Realize();
+ operator Gen::OpArg() const &;
+ operator Gen::X64Reg() const &;
+ operator Gen::OpArg() const && = delete;
+ operator Gen::X64Reg() const && = delete;
+
+ void Unlock();
+
+private:
+ friend class RegCache;
+ friend class RCOpArg;
+
+ RCX64Reg(RegCache* rc_, preg_t preg);
+ RCX64Reg(RegCache* rc_, Gen::X64Reg xr);
+
+ RegCache* rc = nullptr;
+ std::variant contents;
+};
+
+class RCForkGuard
+{
+public:
+ ~RCForkGuard() { EndFork(); }
+ RCForkGuard(RCForkGuard&&) noexcept;
+
+ RCForkGuard(const RCForkGuard&) = delete;
+ RCForkGuard& operator=(const RCForkGuard&) = delete;
+ RCForkGuard& operator=(RCForkGuard&&) = delete;
+
+ void EndFork();
+
+private:
+ friend class RegCache;
+
+ explicit RCForkGuard(RegCache& rc_);
+
+ RegCache* rc;
+ std::array m_regs;
+ std::array m_xregs;
+};
+
+class RegCache
+{
+public:
+ enum class FlushMode
+ {
+ Full,
+ MaintainState,
+ };
+
+ explicit RegCache(Jit64& jit);
+ virtual ~RegCache() = default;
+
+ void Start();
+ void SetEmitter(Gen::XEmitter* emitter);
+ bool SanityCheck() const;
+
+ template
+ static void Realize(Ts&... rc)
+ {
+ static_assert(((std::is_same() || std::is_same()) && ...));
+ (rc.Realize(), ...);
+ }
+
+ template
+ static void Unlock(Ts&... rc)
+ {
+ static_assert(((std::is_same() || std::is_same()) && ...));
+ (rc.Unlock(), ...);
+ }
+
+ template
+ bool IsImm(Args... pregs) const
+ {
+ static_assert(sizeof...(pregs) > 0);
+ return (R(pregs).IsImm() && ...);
+ }
+ u32 Imm32(preg_t preg) const { return R(preg).Imm32(); }
+ s32 SImm32(preg_t preg) const { return R(preg).SImm32(); }
+
+ RCOpArg Use(preg_t preg, RCMode mode);
+ RCOpArg UseNoImm(preg_t preg, RCMode mode);
+ RCOpArg BindOrImm(preg_t preg, RCMode mode);
+ RCX64Reg Bind(preg_t preg, RCMode mode);
+ RCX64Reg RevertableBind(preg_t preg, RCMode mode);
+ RCX64Reg Scratch();
+ RCX64Reg Scratch(Gen::X64Reg xr);
+
+ RCForkGuard Fork();
+ void Flush(BitSet32 pregs = BitSet32::AllTrue(32));
+ void Revert();
+ void Commit();
+
+ bool IsAllUnlocked() const;
+
+ void PreloadRegisters(BitSet32 pregs);
+ BitSet32 RegistersInUse() const;
+
+protected:
+ friend class RCOpArg;
+ friend class RCX64Reg;
+ friend class RCForkGuard;
+
+ virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
+ virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
+ virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
+
+ virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;
+
+ virtual BitSet32 GetRegUtilization() const = 0;
+ virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
+
+ void FlushX(Gen::X64Reg reg);
+ void DiscardRegContentsIfCached(preg_t preg);
+ void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
+ void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full);
+
+ Gen::X64Reg GetFreeXReg();
+
+ int NumFreeRegisters() const;
+ float ScoreRegister(Gen::X64Reg xreg) const;
+
+ const Gen::OpArg& R(preg_t preg) const;
+ Gen::X64Reg RX(preg_t preg) const;
+
+ void Lock(preg_t preg);
+ void Unlock(preg_t preg);
+ void LockX(Gen::X64Reg xr);
+ void UnlockX(Gen::X64Reg xr);
+ bool IsRealized(preg_t preg) const;
+ void Realize(preg_t preg);
+
+ bool IsAnyConstraintActive() const;
+
+ Jit64& m_jit;
+ std::array m_regs;
+ std::array m_xregs;
+ std::array m_constraints;
+ Gen::XEmitter* m_emitter = nullptr;
+};
diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/RCMode.h b/Source/Core/Core/PowerPC/Jit64/RegCache/RCMode.h
new file mode 100644
index 0000000000..efe72ac4f5
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/RCMode.h
@@ -0,0 +1,12 @@
+// Copyright 2018 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+enum class RCMode
+{
+ Read,
+ Write,
+ ReadWrite,
+};
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
index 90cd71ac87..062caee096 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
@@ -70,11 +70,6 @@ protected:
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
- // If these are set, we've stored the old value of a register which will be loaded in
- // revertLoad,
- // which lets us revert it on the exception path.
- int revertGprLoad;
- int revertFprLoad;
bool assumeNoPairedQuantize;
std::map constantGqr;