Merge pull request #4535 from lioncash/regcache

Jit64: Make register caches not rely on the global jit variable
This commit is contained in:
Mat M 2017-01-01 18:01:29 -05:00 committed by GitHub
commit b10a0de769
11 changed files with 484 additions and 378 deletions

View File

@ -196,6 +196,8 @@ if(_M_X86)
PowerPC/Jit64IL/IR_X86.cpp
PowerPC/Jit64IL/JitIL.cpp
PowerPC/Jit64IL/JitIL_Tables.cpp
PowerPC/Jit64/FPURegCache.cpp
PowerPC/Jit64/GPRRegCache.cpp
PowerPC/Jit64/Jit64_Tables.cpp
PowerPC/Jit64/JitAsm.cpp
PowerPC/Jit64/Jit_Branch.cpp

View File

@ -225,6 +225,8 @@
<ClCompile Include="PowerPC\Jit64IL\IR_X86.cpp" />
<ClCompile Include="PowerPC\Jit64IL\JitIL.cpp" />
<ClCompile Include="PowerPC\Jit64IL\JitIL_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit64_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
@ -427,6 +429,8 @@
<ClInclude Include="PowerPC\Interpreter\Interpreter_Tables.h" />
<ClInclude Include="PowerPC\Jit64IL\JitIL.h" />
<ClInclude Include="PowerPC\Jit64IL\JitIL_Tables.h" />
<ClInclude Include="PowerPC\Jit64\FPURegCache.h" />
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h" />
<ClInclude Include="PowerPC\Jit64\Jit.h" />
<ClInclude Include="PowerPC\Jit64\Jit64_Tables.h" />
<ClInclude Include="PowerPC\Jit64\JitAsm.h" />

View File

@ -666,6 +666,12 @@
<ClCompile Include="PowerPC\Jit64IL\JitIL_Tables.cpp">
<Filter>PowerPC\JitIL</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp">
<Filter>PowerPC\Jit64</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp">
<Filter>PowerPC\Jit64</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64\Jit_Branch.cpp">
<Filter>PowerPC\Jit64</Filter>
</ClCompile>
@ -1262,6 +1268,12 @@
<ClInclude Include="PowerPC\Jit64IL\JitIL_Tables.h">
<Filter>PowerPC\JitIL</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64\FPURegCache.h">
<Filter>PowerPC\Jit64</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h">
<Filter>PowerPC\Jit64</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64\JitRegCache.h">
<Filter>PowerPC\Jit64</Filter>
</ClInclude>

View File

@ -0,0 +1,58 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
using namespace Gen;
FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit}
{
}
void FPURegCache::StoreRegister(size_t preg, const OpArg& new_loc)
{
m_emitter->MOVAPD(new_loc, m_regs[preg].location.GetSimpleReg());
}
void FPURegCache::LoadRegister(size_t preg, X64Reg new_loc)
{
m_emitter->MOVAPD(new_loc, m_regs[preg].location);
}
const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
*count = sizeof(allocation_order) / sizeof(X64Reg);
return allocation_order;
}
OpArg FPURegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(ps[reg][0]);
}
BitSet32 FPURegCache::GetRegUtilization()
{
return m_jit.js.op->gprInReg;
}
BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regs_used;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regs_in = m_jit.js.op[i].fregsIn;
regs_used |= regs_in;
if (regs_in[preg])
return regs_used;
}
return regs_used;
}

View File

@ -0,0 +1,22 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Jit64/JitRegCache.h"
class Jit64;
class FPURegCache final : public RegCache
{
public:
explicit FPURegCache(Jit64& jit);
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};

View File

@ -0,0 +1,76 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
using namespace Gen;
GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit}
{
}
void GPRRegCache::StoreRegister(size_t preg, const OpArg& new_loc)
{
m_emitter->MOV(32, new_loc, m_regs[preg].location);
}
void GPRRegCache::LoadRegister(size_t preg, X64Reg new_loc)
{
m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].location);
}
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(gpr[reg]);
}
const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocation_order[] = {
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
// this.
#ifdef _WIN32
RSI, RDI, R13, R14, R15, R8,
R9, R10, R11, R12, RCX
#else
R12, R13, R14, R15, RSI, RDI,
R8, R9, R10, R11, RCX
#endif
};
*count = sizeof(allocation_order) / sizeof(X64Reg);
return allocation_order;
}
void GPRRegCache::SetImmediate32(size_t preg, u32 imm_value, bool dirty)
{
// "dirty" can be false to avoid redundantly flushing an immediate when
// processing speculative constants.
DiscardRegContentsIfCached(preg);
m_regs[preg].away |= dirty;
m_regs[preg].location = Imm32(imm_value);
}
BitSet32 GPRRegCache::GetRegUtilization()
{
return m_jit.js.op->gprInReg;
}
BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regs_used;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regs_in = m_jit.js.op[i].regsIn;
regs_used |= regs_in;
if (regs_in[preg])
return regs_used;
}
return regs_used;
}

View File

@ -0,0 +1,23 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Jit64/JitRegCache.h"
class Jit64;
class GPRRegCache final : public RegCache
{
public:
explicit GPRRegCache(Jit64& jit);
void StoreRegister(size_t preg, const Gen::OpArg& new_loc) override;
void LoadRegister(size_t preg, Gen::X64Reg new_loc) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
void SetImmediate32(size_t preg, u32 imm_value, bool dirty = true);
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};

View File

@ -950,7 +950,7 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const
BitSet32 Jit64::CallerSavedRegistersInUse() const
{
BitSet32 result;
for (int i = 0; i < NUMXREGS; i++)
for (size_t i = 0; i < RegCache::NUM_XREGS; i++)
{
if (!gpr.IsFreeX(i))
result[i] = true;

View File

@ -21,6 +21,8 @@
#include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
@ -33,8 +35,8 @@ private:
void AllocStack();
void FreeStack();
GPRRegCache gpr;
FPURegCache fpr;
GPRRegCache gpr{*this};
FPURegCache fpr{*this};
// The default code buffer. We keep it around to not have to alloc/dealloc a
// large chunk of memory for each recompiled block.

View File

@ -14,30 +14,29 @@
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"
using namespace Gen;
using namespace PowerPC;
RegCache::RegCache() : emit(nullptr)
RegCache::RegCache(Jit64& jit) : m_jit{jit}
{
}
void RegCache::Start()
{
for (auto& xreg : xregs)
for (auto& xreg : m_xregs)
{
xreg.free = true;
xreg.dirty = false;
xreg.locked = false;
xreg.ppcReg = INVALID_REG;
}
for (size_t i = 0; i < regs.size(); i++)
for (size_t i = 0; i < m_regs.size(); i++)
{
regs[i].location = GetDefaultLocation(i);
regs[i].away = false;
regs[i].locked = false;
m_regs[i].location = GetDefaultLocation(i);
m_regs[i].away = false;
m_regs[i].locked = false;
}
// todo: sort to find the most popular regs
@ -57,84 +56,220 @@ void RegCache::Start()
// But only preload IF written OR reads >= 3
}
void RegCache::DiscardRegContentsIfCached(size_t preg)
{
if (IsBound(preg))
{
X64Reg xr = m_regs[preg].location.GetSimpleReg();
m_xregs[xr].free = true;
m_xregs[xr].dirty = false;
m_xregs[xr].ppcReg = INVALID_REG;
m_regs[preg].away = false;
m_regs[preg].location = GetDefaultLocation(preg);
}
}
void RegCache::SetEmitter(XEmitter* emitter)
{
m_emitter = emitter;
}
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{
for (size_t i = 0; i < m_xregs.size(); i++)
{
if (m_xregs[i].locked)
PanicAlert("Someone forgot to unlock X64 reg %zu", i);
}
for (unsigned int i : regsToFlush)
{
if (m_regs[i].locked)
{
PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
}
if (m_regs[i].away)
{
if (m_regs[i].location.IsSimpleReg() || m_regs[i].location.IsImm())
{
StoreFromRegister(i, mode);
}
else
{
_assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
}
}
}
}
void RegCache::FlushR(X64Reg reg)
{
if (reg >= m_xregs.size())
PanicAlert("Flushing non existent reg");
if (!m_xregs[reg].free)
{
StoreFromRegister(m_xregs[reg].ppcReg);
}
}
void RegCache::FlushR(X64Reg reg, X64Reg reg2)
{
FlushR(reg);
FlushR(reg2);
}
void RegCache::FlushLockX(X64Reg reg)
{
FlushR(reg);
LockX(reg);
}
void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2)
{
FlushR(reg1);
FlushR(reg2);
LockX(reg1);
LockX(reg2);
}
int RegCache::SanityCheck() const
{
for (size_t i = 0; i < m_regs.size(); i++)
{
if (m_regs[i].away)
{
if (m_regs[i].location.IsSimpleReg())
{
Gen::X64Reg simple = m_regs[i].location.GetSimpleReg();
if (m_xregs[simple].locked)
return 1;
if (m_xregs[simple].ppcReg != i)
return 2;
}
else if (m_regs[i].location.IsImm())
{
return 3;
}
}
}
return 0;
}
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
{
if (m_regs[preg].away)
{
if (m_regs[preg].location.IsImm())
BindToRegister(preg, doLoad, makeDirty);
else if (m_regs[preg].location.IsSimpleReg())
m_xregs[RX(preg)].dirty |= makeDirty;
}
}
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
{
if (!m_regs[i].away || m_regs[i].location.IsImm())
{
X64Reg xr = GetFreeXReg();
if (m_xregs[xr].dirty)
PanicAlert("Xreg already dirty");
if (m_xregs[xr].locked)
PanicAlert("GetFreeXReg returned locked register");
m_xregs[xr].free = false;
m_xregs[xr].ppcReg = i;
m_xregs[xr].dirty = makeDirty || m_regs[i].away;
if (doLoad)
LoadRegister(i, xr);
for (size_t j = 0; j < m_regs.size(); j++)
{
if (i != j && m_regs[j].location.IsSimpleReg(xr))
{
Crash();
}
}
m_regs[i].away = true;
m_regs[i].location = ::Gen::R(xr);
}
else
{
// reg location must be simplereg; memory locations
// and immediates are taken care of above.
m_xregs[RX(i)].dirty |= makeDirty;
}
if (m_xregs[RX(i)].locked)
{
PanicAlert("Seriously WTF, this reg should have been flushed");
}
}
void RegCache::StoreFromRegister(size_t i, FlushMode mode)
{
if (m_regs[i].away)
{
bool doStore;
if (m_regs[i].location.IsSimpleReg())
{
X64Reg xr = RX(i);
doStore = m_xregs[xr].dirty;
if (mode == FLUSH_ALL)
{
m_xregs[xr].free = true;
m_xregs[xr].ppcReg = INVALID_REG;
m_xregs[xr].dirty = false;
}
}
else
{
// must be immediate - do nothing
doStore = true;
}
OpArg newLoc = GetDefaultLocation(i);
if (doStore)
StoreRegister(i, newLoc);
if (mode == FLUSH_ALL)
{
m_regs[i].location = newLoc;
m_regs[i].away = false;
}
}
}
const OpArg& RegCache::R(size_t preg) const
{
return m_regs[preg].location;
}
X64Reg RegCache::RX(size_t preg) const
{
if (IsBound(preg))
return m_regs[preg].location.GetSimpleReg();
PanicAlert("Unbound register - %zu", preg);
return Gen::INVALID_REG;
}
void RegCache::UnlockAll()
{
for (auto& reg : regs)
for (auto& reg : m_regs)
reg.locked = false;
}
void RegCache::UnlockAllX()
{
for (auto& xreg : xregs)
for (auto& xreg : m_xregs)
xreg.locked = false;
}
BitSet32 GPRRegCache::GetRegUtilization()
bool RegCache::IsFreeX(size_t xreg) const
{
return jit->js.op->gprInReg;
return m_xregs[xreg].free && !m_xregs[xreg].locked;
}
BitSet32 FPURegCache::GetRegUtilization()
bool RegCache::IsBound(size_t preg) const
{
return jit->js.op->gprInReg;
}
BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regsUsed;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regsIn = jit->js.op[i].regsIn;
regsUsed |= regsIn;
if (regsIn[preg])
return regsUsed;
}
return regsUsed;
}
BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regsUsed;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regsIn = jit->js.op[i].fregsIn;
regsUsed |= regsIn;
if (regsIn[preg])
return regsUsed;
}
return regsUsed;
}
// Estimate roughly how bad it would be to de-allocate this register. Higher score
// means more bad.
float RegCache::ScoreRegister(X64Reg xr)
{
size_t preg = xregs[xr].ppcReg;
float score = 0;
// If it's not dirty, we don't need a store to write it back to the register file, so
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (xregs[xr].dirty)
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
// writing it back to the register file isn't quite as bad.
if (GetRegUtilization()[preg])
{
// Don't look too far ahead; we don't want to have quadratic compilation times for
// enormous block sizes!
// This actually improves register allocation a tiny bit; I'm not sure why.
u32 lookahead = std::min(jit->js.instructionsLeft, 64);
// Count how many other registers are going to be used before we need this one again.
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
// before this one gets used again.
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
}
return score;
return m_regs[preg].away && m_regs[preg].location.IsSimpleReg();
}
X64Reg RegCache::GetFreeXReg()
@ -144,7 +279,7 @@ X64Reg RegCache::GetFreeXReg()
for (size_t i = 0; i < aCount; i++)
{
X64Reg xr = aOrder[i];
if (!xregs[xr].locked && xregs[xr].free)
if (!m_xregs[xr].locked && m_xregs[xr].free)
{
return xr;
}
@ -158,8 +293,8 @@ X64Reg RegCache::GetFreeXReg()
for (size_t i = 0; i < aCount; i++)
{
X64Reg xreg = (X64Reg)aOrder[i];
size_t preg = xregs[xreg].ppcReg;
if (xregs[xreg].locked || regs[preg].locked)
size_t preg = m_xregs[xreg].ppcReg;
if (m_xregs[xreg].locked || m_regs[preg].locked)
continue;
float score = ScoreRegister(xreg);
if (score < min_score)
@ -181,232 +316,45 @@ X64Reg RegCache::GetFreeXReg()
return INVALID_REG;
}
void RegCache::FlushR(X64Reg reg)
{
if (reg >= xregs.size())
PanicAlert("Flushing non existent reg");
if (!xregs[reg].free)
{
StoreFromRegister(xregs[reg].ppcReg);
}
}
int RegCache::SanityCheck() const
{
for (size_t i = 0; i < regs.size(); i++)
{
if (regs[i].away)
{
if (regs[i].location.IsSimpleReg())
{
Gen::X64Reg simple = regs[i].location.GetSimpleReg();
if (xregs[simple].locked)
return 1;
if (xregs[simple].ppcReg != i)
return 2;
}
else if (regs[i].location.IsImm())
{
return 3;
}
}
}
return 0;
}
void RegCache::DiscardRegContentsIfCached(size_t preg)
{
if (IsBound(preg))
{
X64Reg xr = regs[preg].location.GetSimpleReg();
xregs[xr].free = true;
xregs[xr].dirty = false;
xregs[xr].ppcReg = INVALID_REG;
regs[preg].away = false;
regs[preg].location = GetDefaultLocation(preg);
}
}
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
{
// "dirty" can be false to avoid redundantly flushing an immediate when
// processing speculative constants.
DiscardRegContentsIfCached(preg);
regs[preg].away |= dirty;
regs[preg].location = Imm32(immValue);
}
const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocationOrder[] = {
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
// this.
#ifdef _WIN32
RSI, RDI, R13, R14, R15, R8,
R9, R10, R11, R12, RCX
#else
R12, R13, R14, R15, RSI, RDI,
R8, R9, R10, R11, RCX
#endif
};
*count = sizeof(allocationOrder) / sizeof(X64Reg);
return allocationOrder;
}
const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocationOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
*count = sizeof(allocationOrder) / sizeof(X64Reg);
return allocationOrder;
}
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(gpr[reg]);
}
OpArg FPURegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(ps[reg][0]);
}
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
{
if (regs[preg].away)
{
if (regs[preg].location.IsImm())
BindToRegister(preg, doLoad, makeDirty);
else if (regs[preg].location.IsSimpleReg())
xregs[RX(preg)].dirty |= makeDirty;
}
}
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
{
if (!regs[i].away || regs[i].location.IsImm())
{
X64Reg xr = GetFreeXReg();
if (xregs[xr].dirty)
PanicAlert("Xreg already dirty");
if (xregs[xr].locked)
PanicAlert("GetFreeXReg returned locked register");
xregs[xr].free = false;
xregs[xr].ppcReg = i;
xregs[xr].dirty = makeDirty || regs[i].away;
if (doLoad)
LoadRegister(i, xr);
for (size_t j = 0; j < regs.size(); j++)
{
if (i != j && regs[j].location.IsSimpleReg(xr))
{
Crash();
}
}
regs[i].away = true;
regs[i].location = ::Gen::R(xr);
}
else
{
// reg location must be simplereg; memory locations
// and immediates are taken care of above.
xregs[RX(i)].dirty |= makeDirty;
}
if (xregs[RX(i)].locked)
{
PanicAlert("Seriously WTF, this reg should have been flushed");
}
}
void RegCache::StoreFromRegister(size_t i, FlushMode mode)
{
if (regs[i].away)
{
bool doStore;
if (regs[i].location.IsSimpleReg())
{
X64Reg xr = RX(i);
doStore = xregs[xr].dirty;
if (mode == FLUSH_ALL)
{
xregs[xr].free = true;
xregs[xr].ppcReg = INVALID_REG;
xregs[xr].dirty = false;
}
}
else
{
// must be immediate - do nothing
doStore = true;
}
OpArg newLoc = GetDefaultLocation(i);
if (doStore)
StoreRegister(i, newLoc);
if (mode == FLUSH_ALL)
{
regs[i].location = newLoc;
regs[i].away = false;
}
}
}
void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc)
{
emit->MOV(32, ::Gen::R(newLoc), regs[preg].location);
}
void GPRRegCache::StoreRegister(size_t preg, const OpArg& newLoc)
{
emit->MOV(32, newLoc, regs[preg].location);
}
void FPURegCache::LoadRegister(size_t preg, X64Reg newLoc)
{
emit->MOVAPD(newLoc, regs[preg].location);
}
void FPURegCache::StoreRegister(size_t preg, const OpArg& newLoc)
{
emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg());
}
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{
for (size_t i = 0; i < xregs.size(); i++)
{
if (xregs[i].locked)
PanicAlert("Someone forgot to unlock X64 reg %zu", i);
}
for (unsigned int i : regsToFlush)
{
if (regs[i].locked)
{
PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
}
if (regs[i].away)
{
if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())
{
StoreFromRegister(i, mode);
}
else
{
_assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
}
}
}
}
int RegCache::NumFreeRegisters()
{
int count = 0;
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
if (!xregs[aOrder[i]].locked && xregs[aOrder[i]].free)
if (!m_xregs[aOrder[i]].locked && m_xregs[aOrder[i]].free)
count++;
return count;
}
// Estimate roughly how bad it would be to de-allocate this register. Higher score
// means more bad.
float RegCache::ScoreRegister(X64Reg xr)
{
size_t preg = m_xregs[xr].ppcReg;
float score = 0;
// If it's not dirty, we don't need a store to write it back to the register file, so
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (m_xregs[xr].dirty)
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
// writing it back to the register file isn't quite as bad.
if (GetRegUtilization()[preg])
{
// Don't look too far ahead; we don't want to have quadratic compilation times for
// enormous block sizes!
// This actually improves register allocation a tiny bit; I'm not sure why.
u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
// Count how many other registers are going to be used before we need this one again.
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
// before this one gets used again.
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
}
return score;
}

View File

@ -10,6 +10,8 @@
#include "Common/x64Emitter.h"
#include "Core/PowerPC/PPCAnalyst.h"
class Jit64;
enum FlushMode
{
FLUSH_ALL,
@ -31,55 +33,31 @@ struct X64CachedReg
bool locked;
};
typedef int XReg;
typedef int PReg;
#define NUMXREGS 16
class RegCache
{
protected:
std::array<PPCCachedReg, 32> regs;
std::array<X64CachedReg, NUMXREGS> xregs;
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) = 0;
virtual BitSet32 GetRegUtilization() = 0;
virtual BitSet32 CountRegsIn(size_t preg, u32 lookahead) = 0;
Gen::XEmitter* emit;
float ScoreRegister(Gen::X64Reg xreg);
public:
RegCache();
virtual ~RegCache() {}
static constexpr size_t NUM_XREGS = 16;
explicit RegCache(Jit64& jit);
virtual ~RegCache() = default;
virtual void StoreRegister(size_t preg, const Gen::OpArg& new_loc) = 0;
virtual void LoadRegister(size_t preg, Gen::X64Reg new_loc) = 0;
virtual Gen::OpArg GetDefaultLocation(size_t reg) const = 0;
void Start();
void DiscardRegContentsIfCached(size_t preg);
void SetEmitter(Gen::XEmitter* emitter) { emit = emitter; }
void FlushR(Gen::X64Reg reg);
void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2)
{
FlushR(reg);
FlushR(reg2);
}
void FlushLockX(Gen::X64Reg reg)
{
FlushR(reg);
LockX(reg);
}
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2)
{
FlushR(reg1);
FlushR(reg2);
LockX(reg1);
LockX(reg2);
}
void SetEmitter(Gen::XEmitter* emitter);
void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32));
void Flush(PPCAnalyst::CodeOp* op) { Flush(); }
void FlushR(Gen::X64Reg reg);
void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2);
void FlushLockX(Gen::X64Reg reg);
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2);
int SanityCheck() const;
void KillImmediate(size_t preg, bool doLoad, bool makeDirty);
@ -87,19 +65,9 @@ public:
// read only will not set dirty flag
void BindToRegister(size_t preg, bool doLoad = true, bool makeDirty = true);
void StoreFromRegister(size_t preg, FlushMode mode = FLUSH_ALL);
virtual void StoreRegister(size_t preg, const Gen::OpArg& newLoc) = 0;
virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0;
const Gen::OpArg& R(size_t preg) const { return regs[preg].location; }
Gen::X64Reg RX(size_t preg) const
{
if (IsBound(preg))
return regs[preg].location.GetSimpleReg();
PanicAlert("Unbound register - %zu", preg);
return Gen::INVALID_REG;
}
virtual Gen::OpArg GetDefaultLocation(size_t reg) const = 0;
const Gen::OpArg& R(size_t preg) const;
Gen::X64Reg RX(size_t preg) const;
// Register locking.
@ -107,7 +75,7 @@ public:
template <typename T>
void Lock(T p)
{
regs[p].locked = true;
m_regs[p].locked = true;
}
template <typename T, typename... Args>
void Lock(T first, Args... args)
@ -120,9 +88,9 @@ public:
template <typename T>
void LockX(T x)
{
if (xregs[x].locked)
if (m_xregs[x].locked)
PanicAlert("RegCache: x %i already locked!", x);
xregs[x].locked = true;
m_xregs[x].locked = true;
}
template <typename T, typename... Args>
void LockX(T first, Args... args)
@ -134,9 +102,9 @@ public:
template <typename T>
void UnlockX(T x)
{
if (!xregs[x].locked)
if (!m_xregs[x].locked)
PanicAlert("RegCache: x %i already unlocked!", x);
xregs[x].locked = false;
m_xregs[x].locked = false;
}
template <typename T, typename... Args>
void UnlockX(T first, Args... args)
@ -148,31 +116,22 @@ public:
void UnlockAll();
void UnlockAllX();
bool IsFreeX(size_t xreg) const { return xregs[xreg].free && !xregs[xreg].locked; }
bool IsBound(size_t preg) const { return regs[preg].away && regs[preg].location.IsSimpleReg(); }
bool IsFreeX(size_t xreg) const;
bool IsBound(size_t preg) const;
Gen::X64Reg GetFreeXReg();
int NumFreeRegisters();
};
class GPRRegCache final : public RegCache
{
public:
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};
protected:
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) = 0;
class FPURegCache final : public RegCache
{
public:
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
virtual BitSet32 GetRegUtilization() = 0;
virtual BitSet32 CountRegsIn(size_t preg, u32 lookahead) = 0;
float ScoreRegister(Gen::X64Reg xreg);
Jit64& m_jit;
std::array<PPCCachedReg, 32> m_regs;
std::array<X64CachedReg, NUM_XREGS> m_xregs;
Gen::XEmitter* m_emitter = nullptr;
};