Merge pull request #4535 from lioncash/regcache
Jit64: Make register caches not rely on the global jit variable
This commit is contained in:
commit
b10a0de769
|
@ -196,6 +196,8 @@ if(_M_X86)
|
|||
PowerPC/Jit64IL/IR_X86.cpp
|
||||
PowerPC/Jit64IL/JitIL.cpp
|
||||
PowerPC/Jit64IL/JitIL_Tables.cpp
|
||||
PowerPC/Jit64/FPURegCache.cpp
|
||||
PowerPC/Jit64/GPRRegCache.cpp
|
||||
PowerPC/Jit64/Jit64_Tables.cpp
|
||||
PowerPC/Jit64/JitAsm.cpp
|
||||
PowerPC/Jit64/Jit_Branch.cpp
|
||||
|
|
|
@ -225,6 +225,8 @@
|
|||
<ClCompile Include="PowerPC\Jit64IL\IR_X86.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64IL\JitIL.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64IL\JitIL_Tables.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit64_Tables.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
|
||||
|
@ -427,6 +429,8 @@
|
|||
<ClInclude Include="PowerPC\Interpreter\Interpreter_Tables.h" />
|
||||
<ClInclude Include="PowerPC\Jit64IL\JitIL.h" />
|
||||
<ClInclude Include="PowerPC\Jit64IL\JitIL_Tables.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\FPURegCache.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\Jit.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\Jit64_Tables.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\JitAsm.h" />
|
||||
|
|
|
@ -666,6 +666,12 @@
|
|||
<ClCompile Include="PowerPC\Jit64IL\JitIL_Tables.cpp">
|
||||
<Filter>PowerPC\JitIL</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp">
|
||||
<Filter>PowerPC\Jit64</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp">
|
||||
<Filter>PowerPC\Jit64</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="PowerPC\Jit64\Jit_Branch.cpp">
|
||||
<Filter>PowerPC\Jit64</Filter>
|
||||
</ClCompile>
|
||||
|
@ -1262,6 +1268,12 @@
|
|||
<ClInclude Include="PowerPC\Jit64IL\JitIL_Tables.h">
|
||||
<Filter>PowerPC\JitIL</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="PowerPC\Jit64\FPURegCache.h">
|
||||
<Filter>PowerPC\Jit64</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h">
|
||||
<Filter>PowerPC\Jit64</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="PowerPC\Jit64\JitRegCache.h">
|
||||
<Filter>PowerPC\Jit64</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/PowerPC/Jit64/FPURegCache.h"
|
||||
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void FPURegCache::StoreRegister(size_t preg, const OpArg& new_loc)
|
||||
{
|
||||
m_emitter->MOVAPD(new_loc, m_regs[preg].location.GetSimpleReg());
|
||||
}
|
||||
|
||||
void FPURegCache::LoadRegister(size_t preg, X64Reg new_loc)
|
||||
{
|
||||
m_emitter->MOVAPD(new_loc, m_regs[preg].location);
|
||||
}
|
||||
|
||||
const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
|
||||
{
|
||||
static const X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
|
||||
XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
|
||||
*count = sizeof(allocation_order) / sizeof(X64Reg);
|
||||
return allocation_order;
|
||||
}
|
||||
|
||||
OpArg FPURegCache::GetDefaultLocation(size_t reg) const
|
||||
{
|
||||
return PPCSTATE(ps[reg][0]);
|
||||
}
|
||||
|
||||
BitSet32 FPURegCache::GetRegUtilization()
|
||||
{
|
||||
return m_jit.js.op->gprInReg;
|
||||
}
|
||||
|
||||
BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
|
||||
{
|
||||
BitSet32 regs_used;
|
||||
|
||||
for (u32 i = 1; i < lookahead; i++)
|
||||
{
|
||||
BitSet32 regs_in = m_jit.js.op[i].fregsIn;
|
||||
regs_used |= regs_in;
|
||||
if (regs_in[preg])
|
||||
return regs_used;
|
||||
}
|
||||
|
||||
return regs_used;
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
|
||||
class Jit64;
|
||||
|
||||
class FPURegCache final : public RegCache
|
||||
{
|
||||
public:
|
||||
explicit FPURegCache(Jit64& jit);
|
||||
|
||||
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
||||
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
||||
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
||||
BitSet32 GetRegUtilization() override;
|
||||
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
||||
};
|
|
@ -0,0 +1,76 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/PowerPC/Jit64/GPRRegCache.h"
|
||||
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void GPRRegCache::StoreRegister(size_t preg, const OpArg& new_loc)
|
||||
{
|
||||
m_emitter->MOV(32, new_loc, m_regs[preg].location);
|
||||
}
|
||||
|
||||
void GPRRegCache::LoadRegister(size_t preg, X64Reg new_loc)
|
||||
{
|
||||
m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].location);
|
||||
}
|
||||
|
||||
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
|
||||
{
|
||||
return PPCSTATE(gpr[reg]);
|
||||
}
|
||||
|
||||
const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
|
||||
{
|
||||
static const X64Reg allocation_order[] = {
|
||||
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
|
||||
// this.
|
||||
#ifdef _WIN32
|
||||
RSI, RDI, R13, R14, R15, R8,
|
||||
R9, R10, R11, R12, RCX
|
||||
#else
|
||||
R12, R13, R14, R15, RSI, RDI,
|
||||
R8, R9, R10, R11, RCX
|
||||
#endif
|
||||
};
|
||||
*count = sizeof(allocation_order) / sizeof(X64Reg);
|
||||
return allocation_order;
|
||||
}
|
||||
|
||||
void GPRRegCache::SetImmediate32(size_t preg, u32 imm_value, bool dirty)
|
||||
{
|
||||
// "dirty" can be false to avoid redundantly flushing an immediate when
|
||||
// processing speculative constants.
|
||||
DiscardRegContentsIfCached(preg);
|
||||
m_regs[preg].away |= dirty;
|
||||
m_regs[preg].location = Imm32(imm_value);
|
||||
}
|
||||
|
||||
BitSet32 GPRRegCache::GetRegUtilization()
|
||||
{
|
||||
return m_jit.js.op->gprInReg;
|
||||
}
|
||||
|
||||
BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
|
||||
{
|
||||
BitSet32 regs_used;
|
||||
|
||||
for (u32 i = 1; i < lookahead; i++)
|
||||
{
|
||||
BitSet32 regs_in = m_jit.js.op[i].regsIn;
|
||||
regs_used |= regs_in;
|
||||
if (regs_in[preg])
|
||||
return regs_used;
|
||||
}
|
||||
|
||||
return regs_used;
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
|
||||
class Jit64;
|
||||
|
||||
class GPRRegCache final : public RegCache
|
||||
{
|
||||
public:
|
||||
explicit GPRRegCache(Jit64& jit);
|
||||
|
||||
void StoreRegister(size_t preg, const Gen::OpArg& new_loc) override;
|
||||
void LoadRegister(size_t preg, Gen::X64Reg new_loc) override;
|
||||
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
||||
void SetImmediate32(size_t preg, u32 imm_value, bool dirty = true);
|
||||
BitSet32 GetRegUtilization() override;
|
||||
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
||||
};
|
|
@ -950,7 +950,7 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const
|
|||
BitSet32 Jit64::CallerSavedRegistersInUse() const
|
||||
{
|
||||
BitSet32 result;
|
||||
for (int i = 0; i < NUMXREGS; i++)
|
||||
for (size_t i = 0; i < RegCache::NUM_XREGS; i++)
|
||||
{
|
||||
if (!gpr.IsFreeX(i))
|
||||
result[i] = true;
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/FPURegCache.h"
|
||||
#include "Core/PowerPC/Jit64/GPRRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
||||
|
@ -33,8 +35,8 @@ private:
|
|||
void AllocStack();
|
||||
void FreeStack();
|
||||
|
||||
GPRRegCache gpr;
|
||||
FPURegCache fpr;
|
||||
GPRRegCache gpr{*this};
|
||||
FPURegCache fpr{*this};
|
||||
|
||||
// The default code buffer. We keep it around to not have to alloc/dealloc a
|
||||
// large chunk of memory for each recompiled block.
|
||||
|
|
|
@ -14,30 +14,29 @@
|
|||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
using namespace Gen;
|
||||
using namespace PowerPC;
|
||||
|
||||
RegCache::RegCache() : emit(nullptr)
|
||||
RegCache::RegCache(Jit64& jit) : m_jit{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void RegCache::Start()
|
||||
{
|
||||
for (auto& xreg : xregs)
|
||||
for (auto& xreg : m_xregs)
|
||||
{
|
||||
xreg.free = true;
|
||||
xreg.dirty = false;
|
||||
xreg.locked = false;
|
||||
xreg.ppcReg = INVALID_REG;
|
||||
}
|
||||
for (size_t i = 0; i < regs.size(); i++)
|
||||
for (size_t i = 0; i < m_regs.size(); i++)
|
||||
{
|
||||
regs[i].location = GetDefaultLocation(i);
|
||||
regs[i].away = false;
|
||||
regs[i].locked = false;
|
||||
m_regs[i].location = GetDefaultLocation(i);
|
||||
m_regs[i].away = false;
|
||||
m_regs[i].locked = false;
|
||||
}
|
||||
|
||||
// todo: sort to find the most popular regs
|
||||
|
@ -57,84 +56,220 @@ void RegCache::Start()
|
|||
// But only preload IF written OR reads >= 3
|
||||
}
|
||||
|
||||
void RegCache::DiscardRegContentsIfCached(size_t preg)
|
||||
{
|
||||
if (IsBound(preg))
|
||||
{
|
||||
X64Reg xr = m_regs[preg].location.GetSimpleReg();
|
||||
m_xregs[xr].free = true;
|
||||
m_xregs[xr].dirty = false;
|
||||
m_xregs[xr].ppcReg = INVALID_REG;
|
||||
m_regs[preg].away = false;
|
||||
m_regs[preg].location = GetDefaultLocation(preg);
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::SetEmitter(XEmitter* emitter)
|
||||
{
|
||||
m_emitter = emitter;
|
||||
}
|
||||
|
||||
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
|
||||
{
|
||||
for (size_t i = 0; i < m_xregs.size(); i++)
|
||||
{
|
||||
if (m_xregs[i].locked)
|
||||
PanicAlert("Someone forgot to unlock X64 reg %zu", i);
|
||||
}
|
||||
|
||||
for (unsigned int i : regsToFlush)
|
||||
{
|
||||
if (m_regs[i].locked)
|
||||
{
|
||||
PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
|
||||
}
|
||||
|
||||
if (m_regs[i].away)
|
||||
{
|
||||
if (m_regs[i].location.IsSimpleReg() || m_regs[i].location.IsImm())
|
||||
{
|
||||
StoreFromRegister(i, mode);
|
||||
}
|
||||
else
|
||||
{
|
||||
_assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::FlushR(X64Reg reg)
|
||||
{
|
||||
if (reg >= m_xregs.size())
|
||||
PanicAlert("Flushing non existent reg");
|
||||
if (!m_xregs[reg].free)
|
||||
{
|
||||
StoreFromRegister(m_xregs[reg].ppcReg);
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::FlushR(X64Reg reg, X64Reg reg2)
|
||||
{
|
||||
FlushR(reg);
|
||||
FlushR(reg2);
|
||||
}
|
||||
|
||||
void RegCache::FlushLockX(X64Reg reg)
|
||||
{
|
||||
FlushR(reg);
|
||||
LockX(reg);
|
||||
}
|
||||
|
||||
void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2)
|
||||
{
|
||||
FlushR(reg1);
|
||||
FlushR(reg2);
|
||||
LockX(reg1);
|
||||
LockX(reg2);
|
||||
}
|
||||
|
||||
int RegCache::SanityCheck() const
|
||||
{
|
||||
for (size_t i = 0; i < m_regs.size(); i++)
|
||||
{
|
||||
if (m_regs[i].away)
|
||||
{
|
||||
if (m_regs[i].location.IsSimpleReg())
|
||||
{
|
||||
Gen::X64Reg simple = m_regs[i].location.GetSimpleReg();
|
||||
if (m_xregs[simple].locked)
|
||||
return 1;
|
||||
if (m_xregs[simple].ppcReg != i)
|
||||
return 2;
|
||||
}
|
||||
else if (m_regs[i].location.IsImm())
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (m_regs[preg].away)
|
||||
{
|
||||
if (m_regs[preg].location.IsImm())
|
||||
BindToRegister(preg, doLoad, makeDirty);
|
||||
else if (m_regs[preg].location.IsSimpleReg())
|
||||
m_xregs[RX(preg)].dirty |= makeDirty;
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (!m_regs[i].away || m_regs[i].location.IsImm())
|
||||
{
|
||||
X64Reg xr = GetFreeXReg();
|
||||
if (m_xregs[xr].dirty)
|
||||
PanicAlert("Xreg already dirty");
|
||||
if (m_xregs[xr].locked)
|
||||
PanicAlert("GetFreeXReg returned locked register");
|
||||
m_xregs[xr].free = false;
|
||||
m_xregs[xr].ppcReg = i;
|
||||
m_xregs[xr].dirty = makeDirty || m_regs[i].away;
|
||||
if (doLoad)
|
||||
LoadRegister(i, xr);
|
||||
for (size_t j = 0; j < m_regs.size(); j++)
|
||||
{
|
||||
if (i != j && m_regs[j].location.IsSimpleReg(xr))
|
||||
{
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
m_regs[i].away = true;
|
||||
m_regs[i].location = ::Gen::R(xr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// reg location must be simplereg; memory locations
|
||||
// and immediates are taken care of above.
|
||||
m_xregs[RX(i)].dirty |= makeDirty;
|
||||
}
|
||||
|
||||
if (m_xregs[RX(i)].locked)
|
||||
{
|
||||
PanicAlert("Seriously WTF, this reg should have been flushed");
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::StoreFromRegister(size_t i, FlushMode mode)
|
||||
{
|
||||
if (m_regs[i].away)
|
||||
{
|
||||
bool doStore;
|
||||
if (m_regs[i].location.IsSimpleReg())
|
||||
{
|
||||
X64Reg xr = RX(i);
|
||||
doStore = m_xregs[xr].dirty;
|
||||
if (mode == FLUSH_ALL)
|
||||
{
|
||||
m_xregs[xr].free = true;
|
||||
m_xregs[xr].ppcReg = INVALID_REG;
|
||||
m_xregs[xr].dirty = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// must be immediate - do nothing
|
||||
doStore = true;
|
||||
}
|
||||
OpArg newLoc = GetDefaultLocation(i);
|
||||
if (doStore)
|
||||
StoreRegister(i, newLoc);
|
||||
if (mode == FLUSH_ALL)
|
||||
{
|
||||
m_regs[i].location = newLoc;
|
||||
m_regs[i].away = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const OpArg& RegCache::R(size_t preg) const
|
||||
{
|
||||
return m_regs[preg].location;
|
||||
}
|
||||
|
||||
X64Reg RegCache::RX(size_t preg) const
|
||||
{
|
||||
if (IsBound(preg))
|
||||
return m_regs[preg].location.GetSimpleReg();
|
||||
|
||||
PanicAlert("Unbound register - %zu", preg);
|
||||
return Gen::INVALID_REG;
|
||||
}
|
||||
|
||||
void RegCache::UnlockAll()
|
||||
{
|
||||
for (auto& reg : regs)
|
||||
for (auto& reg : m_regs)
|
||||
reg.locked = false;
|
||||
}
|
||||
|
||||
void RegCache::UnlockAllX()
|
||||
{
|
||||
for (auto& xreg : xregs)
|
||||
for (auto& xreg : m_xregs)
|
||||
xreg.locked = false;
|
||||
}
|
||||
|
||||
BitSet32 GPRRegCache::GetRegUtilization()
|
||||
bool RegCache::IsFreeX(size_t xreg) const
|
||||
{
|
||||
return jit->js.op->gprInReg;
|
||||
return m_xregs[xreg].free && !m_xregs[xreg].locked;
|
||||
}
|
||||
|
||||
BitSet32 FPURegCache::GetRegUtilization()
|
||||
bool RegCache::IsBound(size_t preg) const
|
||||
{
|
||||
return jit->js.op->gprInReg;
|
||||
}
|
||||
|
||||
BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
|
||||
{
|
||||
BitSet32 regsUsed;
|
||||
for (u32 i = 1; i < lookahead; i++)
|
||||
{
|
||||
BitSet32 regsIn = jit->js.op[i].regsIn;
|
||||
regsUsed |= regsIn;
|
||||
if (regsIn[preg])
|
||||
return regsUsed;
|
||||
}
|
||||
return regsUsed;
|
||||
}
|
||||
|
||||
BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
|
||||
{
|
||||
BitSet32 regsUsed;
|
||||
for (u32 i = 1; i < lookahead; i++)
|
||||
{
|
||||
BitSet32 regsIn = jit->js.op[i].fregsIn;
|
||||
regsUsed |= regsIn;
|
||||
if (regsIn[preg])
|
||||
return regsUsed;
|
||||
}
|
||||
return regsUsed;
|
||||
}
|
||||
|
||||
// Estimate roughly how bad it would be to de-allocate this register. Higher score
|
||||
// means more bad.
|
||||
float RegCache::ScoreRegister(X64Reg xr)
|
||||
{
|
||||
size_t preg = xregs[xr].ppcReg;
|
||||
float score = 0;
|
||||
|
||||
// If it's not dirty, we don't need a store to write it back to the register file, so
|
||||
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
|
||||
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
|
||||
// to the number of extra stores it causes.
|
||||
if (xregs[xr].dirty)
|
||||
score += 2;
|
||||
|
||||
// If the register isn't actually needed in a physical register for a later instruction,
|
||||
// writing it back to the register file isn't quite as bad.
|
||||
if (GetRegUtilization()[preg])
|
||||
{
|
||||
// Don't look too far ahead; we don't want to have quadratic compilation times for
|
||||
// enormous block sizes!
|
||||
// This actually improves register allocation a tiny bit; I'm not sure why.
|
||||
u32 lookahead = std::min(jit->js.instructionsLeft, 64);
|
||||
// Count how many other registers are going to be used before we need this one again.
|
||||
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
|
||||
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
|
||||
// before this one gets used again.
|
||||
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
|
||||
}
|
||||
|
||||
return score;
|
||||
return m_regs[preg].away && m_regs[preg].location.IsSimpleReg();
|
||||
}
|
||||
|
||||
X64Reg RegCache::GetFreeXReg()
|
||||
|
@ -144,7 +279,7 @@ X64Reg RegCache::GetFreeXReg()
|
|||
for (size_t i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xr = aOrder[i];
|
||||
if (!xregs[xr].locked && xregs[xr].free)
|
||||
if (!m_xregs[xr].locked && m_xregs[xr].free)
|
||||
{
|
||||
return xr;
|
||||
}
|
||||
|
@ -158,8 +293,8 @@ X64Reg RegCache::GetFreeXReg()
|
|||
for (size_t i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xreg = (X64Reg)aOrder[i];
|
||||
size_t preg = xregs[xreg].ppcReg;
|
||||
if (xregs[xreg].locked || regs[preg].locked)
|
||||
size_t preg = m_xregs[xreg].ppcReg;
|
||||
if (m_xregs[xreg].locked || m_regs[preg].locked)
|
||||
continue;
|
||||
float score = ScoreRegister(xreg);
|
||||
if (score < min_score)
|
||||
|
@ -181,232 +316,45 @@ X64Reg RegCache::GetFreeXReg()
|
|||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void RegCache::FlushR(X64Reg reg)
|
||||
{
|
||||
if (reg >= xregs.size())
|
||||
PanicAlert("Flushing non existent reg");
|
||||
if (!xregs[reg].free)
|
||||
{
|
||||
StoreFromRegister(xregs[reg].ppcReg);
|
||||
}
|
||||
}
|
||||
|
||||
int RegCache::SanityCheck() const
|
||||
{
|
||||
for (size_t i = 0; i < regs.size(); i++)
|
||||
{
|
||||
if (regs[i].away)
|
||||
{
|
||||
if (regs[i].location.IsSimpleReg())
|
||||
{
|
||||
Gen::X64Reg simple = regs[i].location.GetSimpleReg();
|
||||
if (xregs[simple].locked)
|
||||
return 1;
|
||||
if (xregs[simple].ppcReg != i)
|
||||
return 2;
|
||||
}
|
||||
else if (regs[i].location.IsImm())
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RegCache::DiscardRegContentsIfCached(size_t preg)
|
||||
{
|
||||
if (IsBound(preg))
|
||||
{
|
||||
X64Reg xr = regs[preg].location.GetSimpleReg();
|
||||
xregs[xr].free = true;
|
||||
xregs[xr].dirty = false;
|
||||
xregs[xr].ppcReg = INVALID_REG;
|
||||
regs[preg].away = false;
|
||||
regs[preg].location = GetDefaultLocation(preg);
|
||||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
|
||||
{
|
||||
// "dirty" can be false to avoid redundantly flushing an immediate when
|
||||
// processing speculative constants.
|
||||
DiscardRegContentsIfCached(preg);
|
||||
regs[preg].away |= dirty;
|
||||
regs[preg].location = Imm32(immValue);
|
||||
}
|
||||
|
||||
const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
|
||||
{
|
||||
static const X64Reg allocationOrder[] = {
|
||||
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
|
||||
// this.
|
||||
#ifdef _WIN32
|
||||
RSI, RDI, R13, R14, R15, R8,
|
||||
R9, R10, R11, R12, RCX
|
||||
#else
|
||||
R12, R13, R14, R15, RSI, RDI,
|
||||
R8, R9, R10, R11, RCX
|
||||
#endif
|
||||
};
|
||||
*count = sizeof(allocationOrder) / sizeof(X64Reg);
|
||||
return allocationOrder;
|
||||
}
|
||||
|
||||
const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
|
||||
{
|
||||
static const X64Reg allocationOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
|
||||
XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
|
||||
*count = sizeof(allocationOrder) / sizeof(X64Reg);
|
||||
return allocationOrder;
|
||||
}
|
||||
|
||||
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
|
||||
{
|
||||
return PPCSTATE(gpr[reg]);
|
||||
}
|
||||
|
||||
OpArg FPURegCache::GetDefaultLocation(size_t reg) const
|
||||
{
|
||||
return PPCSTATE(ps[reg][0]);
|
||||
}
|
||||
|
||||
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (regs[preg].away)
|
||||
{
|
||||
if (regs[preg].location.IsImm())
|
||||
BindToRegister(preg, doLoad, makeDirty);
|
||||
else if (regs[preg].location.IsSimpleReg())
|
||||
xregs[RX(preg)].dirty |= makeDirty;
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (!regs[i].away || regs[i].location.IsImm())
|
||||
{
|
||||
X64Reg xr = GetFreeXReg();
|
||||
if (xregs[xr].dirty)
|
||||
PanicAlert("Xreg already dirty");
|
||||
if (xregs[xr].locked)
|
||||
PanicAlert("GetFreeXReg returned locked register");
|
||||
xregs[xr].free = false;
|
||||
xregs[xr].ppcReg = i;
|
||||
xregs[xr].dirty = makeDirty || regs[i].away;
|
||||
if (doLoad)
|
||||
LoadRegister(i, xr);
|
||||
for (size_t j = 0; j < regs.size(); j++)
|
||||
{
|
||||
if (i != j && regs[j].location.IsSimpleReg(xr))
|
||||
{
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
regs[i].away = true;
|
||||
regs[i].location = ::Gen::R(xr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// reg location must be simplereg; memory locations
|
||||
// and immediates are taken care of above.
|
||||
xregs[RX(i)].dirty |= makeDirty;
|
||||
}
|
||||
|
||||
if (xregs[RX(i)].locked)
|
||||
{
|
||||
PanicAlert("Seriously WTF, this reg should have been flushed");
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::StoreFromRegister(size_t i, FlushMode mode)
|
||||
{
|
||||
if (regs[i].away)
|
||||
{
|
||||
bool doStore;
|
||||
if (regs[i].location.IsSimpleReg())
|
||||
{
|
||||
X64Reg xr = RX(i);
|
||||
doStore = xregs[xr].dirty;
|
||||
if (mode == FLUSH_ALL)
|
||||
{
|
||||
xregs[xr].free = true;
|
||||
xregs[xr].ppcReg = INVALID_REG;
|
||||
xregs[xr].dirty = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// must be immediate - do nothing
|
||||
doStore = true;
|
||||
}
|
||||
OpArg newLoc = GetDefaultLocation(i);
|
||||
if (doStore)
|
||||
StoreRegister(i, newLoc);
|
||||
if (mode == FLUSH_ALL)
|
||||
{
|
||||
regs[i].location = newLoc;
|
||||
regs[i].away = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc)
|
||||
{
|
||||
emit->MOV(32, ::Gen::R(newLoc), regs[preg].location);
|
||||
}
|
||||
|
||||
void GPRRegCache::StoreRegister(size_t preg, const OpArg& newLoc)
|
||||
{
|
||||
emit->MOV(32, newLoc, regs[preg].location);
|
||||
}
|
||||
|
||||
void FPURegCache::LoadRegister(size_t preg, X64Reg newLoc)
|
||||
{
|
||||
emit->MOVAPD(newLoc, regs[preg].location);
|
||||
}
|
||||
|
||||
void FPURegCache::StoreRegister(size_t preg, const OpArg& newLoc)
|
||||
{
|
||||
emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg());
|
||||
}
|
||||
|
||||
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
|
||||
{
|
||||
for (size_t i = 0; i < xregs.size(); i++)
|
||||
{
|
||||
if (xregs[i].locked)
|
||||
PanicAlert("Someone forgot to unlock X64 reg %zu", i);
|
||||
}
|
||||
|
||||
for (unsigned int i : regsToFlush)
|
||||
{
|
||||
if (regs[i].locked)
|
||||
{
|
||||
PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
|
||||
}
|
||||
|
||||
if (regs[i].away)
|
||||
{
|
||||
if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())
|
||||
{
|
||||
StoreFromRegister(i, mode);
|
||||
}
|
||||
else
|
||||
{
|
||||
_assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int RegCache::NumFreeRegisters()
|
||||
{
|
||||
int count = 0;
|
||||
size_t aCount;
|
||||
const X64Reg* aOrder = GetAllocationOrder(&aCount);
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
if (!xregs[aOrder[i]].locked && xregs[aOrder[i]].free)
|
||||
if (!m_xregs[aOrder[i]].locked && m_xregs[aOrder[i]].free)
|
||||
count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
// Estimate roughly how bad it would be to de-allocate this register. Higher score
|
||||
// means more bad.
|
||||
float RegCache::ScoreRegister(X64Reg xr)
|
||||
{
|
||||
size_t preg = m_xregs[xr].ppcReg;
|
||||
float score = 0;
|
||||
|
||||
// If it's not dirty, we don't need a store to write it back to the register file, so
|
||||
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
|
||||
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
|
||||
// to the number of extra stores it causes.
|
||||
if (m_xregs[xr].dirty)
|
||||
score += 2;
|
||||
|
||||
// If the register isn't actually needed in a physical register for a later instruction,
|
||||
// writing it back to the register file isn't quite as bad.
|
||||
if (GetRegUtilization()[preg])
|
||||
{
|
||||
// Don't look too far ahead; we don't want to have quadratic compilation times for
|
||||
// enormous block sizes!
|
||||
// This actually improves register allocation a tiny bit; I'm not sure why.
|
||||
u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
|
||||
// Count how many other registers are going to be used before we need this one again.
|
||||
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
|
||||
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
|
||||
// before this one gets used again.
|
||||
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
|
||||
class Jit64;
|
||||
|
||||
enum FlushMode
|
||||
{
|
||||
FLUSH_ALL,
|
||||
|
@ -31,55 +33,31 @@ struct X64CachedReg
|
|||
bool locked;
|
||||
};
|
||||
|
||||
typedef int XReg;
|
||||
typedef int PReg;
|
||||
|
||||
#define NUMXREGS 16
|
||||
|
||||
class RegCache
|
||||
{
|
||||
protected:
|
||||
std::array<PPCCachedReg, 32> regs;
|
||||
std::array<X64CachedReg, NUMXREGS> xregs;
|
||||
|
||||
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) = 0;
|
||||
|
||||
virtual BitSet32 GetRegUtilization() = 0;
|
||||
virtual BitSet32 CountRegsIn(size_t preg, u32 lookahead) = 0;
|
||||
|
||||
Gen::XEmitter* emit;
|
||||
|
||||
float ScoreRegister(Gen::X64Reg xreg);
|
||||
|
||||
public:
|
||||
RegCache();
|
||||
virtual ~RegCache() {}
|
||||
static constexpr size_t NUM_XREGS = 16;
|
||||
|
||||
explicit RegCache(Jit64& jit);
|
||||
virtual ~RegCache() = default;
|
||||
|
||||
virtual void StoreRegister(size_t preg, const Gen::OpArg& new_loc) = 0;
|
||||
virtual void LoadRegister(size_t preg, Gen::X64Reg new_loc) = 0;
|
||||
virtual Gen::OpArg GetDefaultLocation(size_t reg) const = 0;
|
||||
|
||||
void Start();
|
||||
|
||||
void DiscardRegContentsIfCached(size_t preg);
|
||||
void SetEmitter(Gen::XEmitter* emitter) { emit = emitter; }
|
||||
void FlushR(Gen::X64Reg reg);
|
||||
void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2)
|
||||
{
|
||||
FlushR(reg);
|
||||
FlushR(reg2);
|
||||
}
|
||||
|
||||
void FlushLockX(Gen::X64Reg reg)
|
||||
{
|
||||
FlushR(reg);
|
||||
LockX(reg);
|
||||
}
|
||||
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2)
|
||||
{
|
||||
FlushR(reg1);
|
||||
FlushR(reg2);
|
||||
LockX(reg1);
|
||||
LockX(reg2);
|
||||
}
|
||||
void SetEmitter(Gen::XEmitter* emitter);
|
||||
|
||||
void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32));
|
||||
void Flush(PPCAnalyst::CodeOp* op) { Flush(); }
|
||||
|
||||
void FlushR(Gen::X64Reg reg);
|
||||
void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2);
|
||||
|
||||
void FlushLockX(Gen::X64Reg reg);
|
||||
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2);
|
||||
|
||||
int SanityCheck() const;
|
||||
void KillImmediate(size_t preg, bool doLoad, bool makeDirty);
|
||||
|
||||
|
@ -87,19 +65,9 @@ public:
|
|||
// read only will not set dirty flag
|
||||
void BindToRegister(size_t preg, bool doLoad = true, bool makeDirty = true);
|
||||
void StoreFromRegister(size_t preg, FlushMode mode = FLUSH_ALL);
|
||||
virtual void StoreRegister(size_t preg, const Gen::OpArg& newLoc) = 0;
|
||||
virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0;
|
||||
|
||||
const Gen::OpArg& R(size_t preg) const { return regs[preg].location; }
|
||||
Gen::X64Reg RX(size_t preg) const
|
||||
{
|
||||
if (IsBound(preg))
|
||||
return regs[preg].location.GetSimpleReg();
|
||||
|
||||
PanicAlert("Unbound register - %zu", preg);
|
||||
return Gen::INVALID_REG;
|
||||
}
|
||||
virtual Gen::OpArg GetDefaultLocation(size_t reg) const = 0;
|
||||
const Gen::OpArg& R(size_t preg) const;
|
||||
Gen::X64Reg RX(size_t preg) const;
|
||||
|
||||
// Register locking.
|
||||
|
||||
|
@ -107,7 +75,7 @@ public:
|
|||
template <typename T>
|
||||
void Lock(T p)
|
||||
{
|
||||
regs[p].locked = true;
|
||||
m_regs[p].locked = true;
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
void Lock(T first, Args... args)
|
||||
|
@ -120,9 +88,9 @@ public:
|
|||
template <typename T>
|
||||
void LockX(T x)
|
||||
{
|
||||
if (xregs[x].locked)
|
||||
if (m_xregs[x].locked)
|
||||
PanicAlert("RegCache: x %i already locked!", x);
|
||||
xregs[x].locked = true;
|
||||
m_xregs[x].locked = true;
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
void LockX(T first, Args... args)
|
||||
|
@ -134,9 +102,9 @@ public:
|
|||
template <typename T>
|
||||
void UnlockX(T x)
|
||||
{
|
||||
if (!xregs[x].locked)
|
||||
if (!m_xregs[x].locked)
|
||||
PanicAlert("RegCache: x %i already unlocked!", x);
|
||||
xregs[x].locked = false;
|
||||
m_xregs[x].locked = false;
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
void UnlockX(T first, Args... args)
|
||||
|
@ -148,31 +116,22 @@ public:
|
|||
void UnlockAll();
|
||||
void UnlockAllX();
|
||||
|
||||
bool IsFreeX(size_t xreg) const { return xregs[xreg].free && !xregs[xreg].locked; }
|
||||
bool IsBound(size_t preg) const { return regs[preg].away && regs[preg].location.IsSimpleReg(); }
|
||||
bool IsFreeX(size_t xreg) const;
|
||||
bool IsBound(size_t preg) const;
|
||||
|
||||
Gen::X64Reg GetFreeXReg();
|
||||
int NumFreeRegisters();
|
||||
};
|
||||
|
||||
class GPRRegCache final : public RegCache
|
||||
{
|
||||
public:
|
||||
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
||||
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
|
||||
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
||||
void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
|
||||
BitSet32 GetRegUtilization() override;
|
||||
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
||||
};
|
||||
protected:
|
||||
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) = 0;
|
||||
|
||||
class FPURegCache final : public RegCache
|
||||
{
|
||||
public:
|
||||
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
||||
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
||||
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
||||
BitSet32 GetRegUtilization() override;
|
||||
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
||||
virtual BitSet32 GetRegUtilization() = 0;
|
||||
virtual BitSet32 CountRegsIn(size_t preg, u32 lookahead) = 0;
|
||||
|
||||
float ScoreRegister(Gen::X64Reg xreg);
|
||||
|
||||
Jit64& m_jit;
|
||||
std::array<PPCCachedReg, 32> m_regs;
|
||||
std::array<X64CachedReg, NUM_XREGS> m_xregs;
|
||||
Gen::XEmitter* m_emitter = nullptr;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue