diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt
index 0d76fb8c47..a1bb58f238 100644
--- a/Source/Core/Core/CMakeLists.txt
+++ b/Source/Core/Core/CMakeLists.txt
@@ -196,6 +196,8 @@ if(_M_X86)
PowerPC/Jit64IL/IR_X86.cpp
PowerPC/Jit64IL/JitIL.cpp
PowerPC/Jit64IL/JitIL_Tables.cpp
+ PowerPC/Jit64/FPURegCache.cpp
+ PowerPC/Jit64/GPRRegCache.cpp
PowerPC/Jit64/Jit64_Tables.cpp
PowerPC/Jit64/JitAsm.cpp
PowerPC/Jit64/Jit_Branch.cpp
diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj
index 47845d7b3d..13492f654f 100644
--- a/Source/Core/Core/Core.vcxproj
+++ b/Source/Core/Core/Core.vcxproj
@@ -225,6 +225,8 @@
+
+
@@ -427,6 +429,8 @@
+
+
diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters
index ea1040901e..00023e98f4 100644
--- a/Source/Core/Core/Core.vcxproj.filters
+++ b/Source/Core/Core/Core.vcxproj.filters
@@ -666,6 +666,12 @@
PowerPC\JitIL
+
+ PowerPC\Jit64
+
+
+ PowerPC\Jit64
+
PowerPC\Jit64
@@ -1262,6 +1268,12 @@
PowerPC\JitIL
+
+ PowerPC\Jit64
+
+
+ PowerPC\Jit64
+
PowerPC\Jit64
diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp
new file mode 100644
index 0000000000..698f7da640
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp
@@ -0,0 +1,53 @@
+// Copyright 2016 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include "Core/PowerPC/Jit64/FPURegCache.h"
+
+#include "Core/PowerPC/Jit64Common/Jit64Base.h"
+#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
+
+using namespace Gen;
+
+void FPURegCache::StoreRegister(size_t preg, const OpArg& new_loc)
+{
+ emit->MOVAPD(new_loc, regs[preg].location.GetSimpleReg());
+}
+
+void FPURegCache::LoadRegister(size_t preg, X64Reg new_loc)
+{
+ emit->MOVAPD(new_loc, regs[preg].location);
+}
+
+const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
+{
+ static const X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
+ XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
+ *count = sizeof(allocation_order) / sizeof(X64Reg);
+ return allocation_order;
+}
+
+OpArg FPURegCache::GetDefaultLocation(size_t reg) const
+{
+ return PPCSTATE(ps[reg][0]);
+}
+
+BitSet32 FPURegCache::GetRegUtilization()
+{
+ return jit->js.op->gprInReg;
+}
+
+BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
+{
+ BitSet32 regs_used;
+
+ for (u32 i = 1; i < lookahead; i++)
+ {
+ BitSet32 regs_in = jit->js.op[i].fregsIn;
+ regs_used |= regs_in;
+ if (regs_in[preg])
+ return regs_used;
+ }
+
+ return regs_used;
+}
diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/FPURegCache.h
new file mode 100644
index 0000000000..0ea2faf2f8
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/FPURegCache.h
@@ -0,0 +1,18 @@
+// Copyright 2016 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "Core/PowerPC/Jit64/JitRegCache.h"
+
+class FPURegCache final : public RegCache
+{
+public:
+ void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
+ void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
+ const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
+ Gen::OpArg GetDefaultLocation(size_t reg) const override;
+ BitSet32 GetRegUtilization() override;
+ BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
+};
diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp
new file mode 100644
index 0000000000..c71e4471e8
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp
@@ -0,0 +1,71 @@
+// Copyright 2016 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include "Core/PowerPC/Jit64/GPRRegCache.h"
+
+#include "Core/PowerPC/Jit64Common/Jit64Base.h"
+#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
+
+using namespace Gen;
+
+void GPRRegCache::StoreRegister(size_t preg, const OpArg& new_loc)
+{
+ emit->MOV(32, new_loc, regs[preg].location);
+}
+
+void GPRRegCache::LoadRegister(size_t preg, X64Reg new_loc)
+{
+ emit->MOV(32, ::Gen::R(new_loc), regs[preg].location);
+}
+
+OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
+{
+ return PPCSTATE(gpr[reg]);
+}
+
+const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
+{
+ static const X64Reg allocation_order[] = {
+// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
+// this.
+#ifdef _WIN32
+ RSI, RDI, R13, R14, R15, R8,
+ R9, R10, R11, R12, RCX
+#else
+ R12, R13, R14, R15, RSI, RDI,
+ R8, R9, R10, R11, RCX
+#endif
+ };
+ *count = sizeof(allocation_order) / sizeof(X64Reg);
+ return allocation_order;
+}
+
+void GPRRegCache::SetImmediate32(size_t preg, u32 imm_value, bool dirty)
+{
+ // "dirty" can be false to avoid redundantly flushing an immediate when
+ // processing speculative constants.
+ DiscardRegContentsIfCached(preg);
+ regs[preg].away |= dirty;
+ regs[preg].location = Imm32(imm_value);
+}
+
+BitSet32 GPRRegCache::GetRegUtilization()
+{
+ return jit->js.op->gprInReg;
+}
+
+BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
+{
+ BitSet32 regs_used;
+
+ for (u32 i = 1; i < lookahead; i++)
+ {
+ BitSet32 regs_in = jit->js.op[i].regsIn;
+ regs_used |= regs_in;
+ if (regs_in[preg])
+ return regs_used;
+ }
+
+ return regs_used;
+}
diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h
new file mode 100644
index 0000000000..7032254611
--- /dev/null
+++ b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "Core/PowerPC/Jit64/JitRegCache.h"
+
+class GPRRegCache final : public RegCache
+{
+public:
+ void StoreRegister(size_t preg, const Gen::OpArg& new_loc) override;
+ void LoadRegister(size_t preg, Gen::X64Reg new_loc) override;
+ Gen::OpArg GetDefaultLocation(size_t reg) const override;
+ const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
+ void SetImmediate32(size_t preg, u32 imm_value, bool dirty = true);
+ BitSet32 GetRegUtilization() override;
+ BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
+};
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 588d776fab..f2318971c2 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -21,6 +21,8 @@
#include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
+#include "Core/PowerPC/Jit64/FPURegCache.h"
+#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp
index 278d6a126c..c99f8511c7 100644
--- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp
@@ -14,7 +14,6 @@
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
-#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"
using namespace Gen;
@@ -57,128 +56,17 @@ void RegCache::Start()
// But only preload IF written OR reads >= 3
}
-void RegCache::UnlockAll()
+void RegCache::DiscardRegContentsIfCached(size_t preg)
{
- for (auto& reg : regs)
- reg.locked = false;
-}
-
-void RegCache::UnlockAllX()
-{
- for (auto& xreg : xregs)
- xreg.locked = false;
-}
-
-BitSet32 GPRRegCache::GetRegUtilization()
-{
- return jit->js.op->gprInReg;
-}
-
-BitSet32 FPURegCache::GetRegUtilization()
-{
- return jit->js.op->gprInReg;
-}
-
-BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
-{
- BitSet32 regsUsed;
- for (u32 i = 1; i < lookahead; i++)
+ if (IsBound(preg))
{
- BitSet32 regsIn = jit->js.op[i].regsIn;
- regsUsed |= regsIn;
- if (regsIn[preg])
- return regsUsed;
+ X64Reg xr = regs[preg].location.GetSimpleReg();
+ xregs[xr].free = true;
+ xregs[xr].dirty = false;
+ xregs[xr].ppcReg = INVALID_REG;
+ regs[preg].away = false;
+ regs[preg].location = GetDefaultLocation(preg);
}
- return regsUsed;
-}
-
-BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
-{
- BitSet32 regsUsed;
- for (u32 i = 1; i < lookahead; i++)
- {
- BitSet32 regsIn = jit->js.op[i].fregsIn;
- regsUsed |= regsIn;
- if (regsIn[preg])
- return regsUsed;
- }
- return regsUsed;
-}
-
-// Estimate roughly how bad it would be to de-allocate this register. Higher score
-// means more bad.
-float RegCache::ScoreRegister(X64Reg xr)
-{
- size_t preg = xregs[xr].ppcReg;
- float score = 0;
-
- // If it's not dirty, we don't need a store to write it back to the register file, so
- // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
- // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
- // to the number of extra stores it causes.
- if (xregs[xr].dirty)
- score += 2;
-
- // If the register isn't actually needed in a physical register for a later instruction,
- // writing it back to the register file isn't quite as bad.
- if (GetRegUtilization()[preg])
- {
- // Don't look too far ahead; we don't want to have quadratic compilation times for
- // enormous block sizes!
- // This actually improves register allocation a tiny bit; I'm not sure why.
- u32 lookahead = std::min(jit->js.instructionsLeft, 64);
- // Count how many other registers are going to be used before we need this one again.
- u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
- // Totally ad-hoc heuristic to bias based on how many other registers we'll need
- // before this one gets used again.
- score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
- }
-
- return score;
-}
-
-X64Reg RegCache::GetFreeXReg()
-{
- size_t aCount;
- const X64Reg* aOrder = GetAllocationOrder(&aCount);
- for (size_t i = 0; i < aCount; i++)
- {
- X64Reg xr = aOrder[i];
- if (!xregs[xr].locked && xregs[xr].free)
- {
- return xr;
- }
- }
-
- // Okay, not found; run the register allocator heuristic and figure out which register we should
- // clobber.
- float min_score = std::numeric_limits::max();
- X64Reg best_xreg = INVALID_REG;
- size_t best_preg = 0;
- for (size_t i = 0; i < aCount; i++)
- {
- X64Reg xreg = (X64Reg)aOrder[i];
- size_t preg = xregs[xreg].ppcReg;
- if (xregs[xreg].locked || regs[preg].locked)
- continue;
- float score = ScoreRegister(xreg);
- if (score < min_score)
- {
- min_score = score;
- best_xreg = xreg;
- best_preg = preg;
- }
- }
-
- if (best_xreg != INVALID_REG)
- {
- StoreFromRegister(best_preg);
- return best_xreg;
- }
-
- // Still no dice? Die!
- _assert_msg_(DYNA_REC, 0, "Regcache ran out of regs");
- return INVALID_REG;
}
void RegCache::FlushR(X64Reg reg)
@@ -191,6 +79,35 @@ void RegCache::FlushR(X64Reg reg)
}
}
+void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
+{
+ for (size_t i = 0; i < xregs.size(); i++)
+ {
+ if (xregs[i].locked)
+ PanicAlert("Someone forgot to unlock X64 reg %zu", i);
+ }
+
+ for (unsigned int i : regsToFlush)
+ {
+ if (regs[i].locked)
+ {
+ PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
+ }
+
+ if (regs[i].away)
+ {
+ if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())
+ {
+ StoreFromRegister(i, mode);
+ }
+ else
+ {
+ _assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
+ }
+ }
+ }
+}
+
int RegCache::SanityCheck() const
{
for (size_t i = 0; i < regs.size(); i++)
@@ -214,63 +131,6 @@ int RegCache::SanityCheck() const
return 0;
}
-void RegCache::DiscardRegContentsIfCached(size_t preg)
-{
- if (IsBound(preg))
- {
- X64Reg xr = regs[preg].location.GetSimpleReg();
- xregs[xr].free = true;
- xregs[xr].dirty = false;
- xregs[xr].ppcReg = INVALID_REG;
- regs[preg].away = false;
- regs[preg].location = GetDefaultLocation(preg);
- }
-}
-
-void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
-{
- // "dirty" can be false to avoid redundantly flushing an immediate when
- // processing speculative constants.
- DiscardRegContentsIfCached(preg);
- regs[preg].away |= dirty;
- regs[preg].location = Imm32(immValue);
-}
-
-const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
-{
- static const X64Reg allocationOrder[] = {
-// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
-// this.
-#ifdef _WIN32
- RSI, RDI, R13, R14, R15, R8,
- R9, R10, R11, R12, RCX
-#else
- R12, R13, R14, R15, RSI, RDI,
- R8, R9, R10, R11, RCX
-#endif
- };
- *count = sizeof(allocationOrder) / sizeof(X64Reg);
- return allocationOrder;
-}
-
-const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
-{
- static const X64Reg allocationOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
- XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
- *count = sizeof(allocationOrder) / sizeof(X64Reg);
- return allocationOrder;
-}
-
-OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
-{
- return PPCSTATE(gpr[reg]);
-}
-
-OpArg FPURegCache::GetDefaultLocation(size_t reg) const
-{
- return PPCSTATE(ps[reg][0]);
-}
-
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
{
if (regs[preg].away)
@@ -351,53 +211,60 @@ void RegCache::StoreFromRegister(size_t i, FlushMode mode)
}
}
-void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc)
+void RegCache::UnlockAll()
{
- emit->MOV(32, ::Gen::R(newLoc), regs[preg].location);
+ for (auto& reg : regs)
+ reg.locked = false;
}
-void GPRRegCache::StoreRegister(size_t preg, const OpArg& newLoc)
+void RegCache::UnlockAllX()
{
- emit->MOV(32, newLoc, regs[preg].location);
+ for (auto& xreg : xregs)
+ xreg.locked = false;
}
-void FPURegCache::LoadRegister(size_t preg, X64Reg newLoc)
+X64Reg RegCache::GetFreeXReg()
{
- emit->MOVAPD(newLoc, regs[preg].location);
-}
-
-void FPURegCache::StoreRegister(size_t preg, const OpArg& newLoc)
-{
- emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg());
-}
-
-void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
-{
- for (size_t i = 0; i < xregs.size(); i++)
+ size_t aCount;
+ const X64Reg* aOrder = GetAllocationOrder(&aCount);
+ for (size_t i = 0; i < aCount; i++)
{
- if (xregs[i].locked)
- PanicAlert("Someone forgot to unlock X64 reg %zu", i);
- }
-
- for (unsigned int i : regsToFlush)
- {
- if (regs[i].locked)
+ X64Reg xr = aOrder[i];
+ if (!xregs[xr].locked && xregs[xr].free)
{
- PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
- }
-
- if (regs[i].away)
- {
- if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())
- {
- StoreFromRegister(i, mode);
- }
- else
- {
- _assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
- }
+ return xr;
}
}
+
+ // Okay, not found; run the register allocator heuristic and figure out which register we should
+ // clobber.
+ float min_score = std::numeric_limits::max();
+ X64Reg best_xreg = INVALID_REG;
+ size_t best_preg = 0;
+ for (size_t i = 0; i < aCount; i++)
+ {
+ X64Reg xreg = (X64Reg)aOrder[i];
+ size_t preg = xregs[xreg].ppcReg;
+ if (xregs[xreg].locked || regs[preg].locked)
+ continue;
+ float score = ScoreRegister(xreg);
+ if (score < min_score)
+ {
+ min_score = score;
+ best_xreg = xreg;
+ best_preg = preg;
+ }
+ }
+
+ if (best_xreg != INVALID_REG)
+ {
+ StoreFromRegister(best_preg);
+ return best_xreg;
+ }
+
+ // Still no dice? Die!
+ _assert_msg_(DYNA_REC, 0, "Regcache ran out of regs");
+ return INVALID_REG;
}
int RegCache::NumFreeRegisters()
@@ -410,3 +277,35 @@ int RegCache::NumFreeRegisters()
count++;
return count;
}
+
+// Estimate roughly how bad it would be to de-allocate this register. Higher score
+// means more bad.
+float RegCache::ScoreRegister(X64Reg xr)
+{
+ size_t preg = xregs[xr].ppcReg;
+ float score = 0;
+
+ // If it's not dirty, we don't need a store to write it back to the register file, so
+ // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
+ // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
+ // to the number of extra stores it causes.
+ if (xregs[xr].dirty)
+ score += 2;
+
+ // If the register isn't actually needed in a physical register for a later instruction,
+ // writing it back to the register file isn't quite as bad.
+ if (GetRegUtilization()[preg])
+ {
+ // Don't look too far ahead; we don't want to have quadratic compilation times for
+ // enormous block sizes!
+ // This actually improves register allocation a tiny bit; I'm not sure why.
+ u32 lookahead = std::min(jit->js.instructionsLeft, 64);
+ // Count how many other registers are going to be used before we need this one again.
+ u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
+ // Totally ad-hoc heuristic to bias based on how many other registers we'll need
+ // before this one gets used again.
+ score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
+ }
+
+ return score;
+}
diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h
index f47e57e377..41fab3d79e 100644
--- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h
+++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h
@@ -153,26 +153,3 @@ public:
Gen::X64Reg GetFreeXReg();
int NumFreeRegisters();
};
-
-class GPRRegCache final : public RegCache
-{
-public:
- void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
- void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
- Gen::OpArg GetDefaultLocation(size_t reg) const override;
- const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
- void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
- BitSet32 GetRegUtilization() override;
- BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
-};
-
-class FPURegCache final : public RegCache
-{
-public:
- void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
- void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
- const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
- Gen::OpArg GetDefaultLocation(size_t reg) const override;
- BitSet32 GetRegUtilization() override;
- BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
-};