Fix all the current issues with the AArch64 register cache.

Fixes all the current issues I've been experiencing.
Scaled back the register cache idea for now so I can actually work on some real instructions.

Tested this work with unit tests so I know it works.
Unit tests are pretty great things.
This commit is contained in:
Ryan Houdek 2014-09-12 05:55:06 -05:00
parent 64b21a4812
commit e883a6fb2e
2 changed files with 218 additions and 84 deletions

View File

@ -13,8 +13,12 @@ void Arm64RegCache::Init(ARM64XEmitter *emitter)
GetAllocationOrder(); GetAllocationOrder();
} }
ARM64Reg Arm64RegCache::GetReg(void) ARM64Reg Arm64RegCache::GetReg()
{ {
// If we have no registers left, dump the most stale register first
if (!GetUnlockedRegisterCount())
FlushMostStaleRegister();
for (auto& it : m_host_registers) for (auto& it : m_host_registers)
{ {
if (!it.IsLocked()) if (!it.IsLocked())
@ -29,6 +33,15 @@ ARM64Reg Arm64RegCache::GetReg(void)
return INVALID_REG; return INVALID_REG;
} }
u32 Arm64RegCache::GetUnlockedRegisterCount()
{
u32 unlocked_registers = 0;
for (auto& it : m_host_registers)
if (!it.IsLocked())
++unlocked_registers;
return unlocked_registers;
}
void Arm64RegCache::LockRegister(ARM64Reg host_reg) void Arm64RegCache::LockRegister(ARM64Reg host_reg)
{ {
auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg);
@ -101,6 +114,81 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg)
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end(); return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
} }
void Arm64GPRCache::FlushRegister(u32 preg)
{
u32 base_reg = preg;
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_REG)
{
ARM64Reg host_reg = reg.GetReg();
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
Unlock(host_reg);
reg.Flush();
}
else if (reg.GetType() == REG_IMM)
{
ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, reg.GetImm());
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
Unlock(host_reg);
reg.Flush();
}
else if (reg.GetType() == REG_AWAY)
{
u32 next_reg = 0;
if (reg.GetAwayLocation() == REG_LOW)
next_reg = base_reg + 1;
else
next_reg = base_reg - 1;
OpArg& reg2 = m_guest_registers[next_reg];
ARM64Reg host_reg = reg.GetAwayReg();
ARM64Reg host_reg_1 = reg.GetReg();
ARM64Reg host_reg_2 = reg2.GetReg();
// Flush if either of these shared registers are used.
if (host_reg_1 == INVALID_REG)
{
// We never loaded this register
// We've got to test the state of our shared register
// Currently it is always reg+1
if (host_reg_2 == INVALID_REG)
{
// We didn't load either of these registers
// This can happen in cases where we had to flush register state
// or if we hit an interpreted instruction before we could use it
// Dump the whole thing in one go and flush both registers
// 64bit host register will store 2 32bit store registers in one go
if (reg.GetAwayLocation() == REG_LOW)
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[base_reg]));
else
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[next_reg]));
}
else
{
// Alright, bottom register isn't used, but top one is
// Only store the top one
m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[next_reg]));
Unlock(host_reg_2);
}
}
else
{
m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[base_reg]));
Unlock(host_reg_1);
}
// Flush both registers
reg.Flush();
reg2.Flush();
Unlock(DecodeReg(host_reg));
}
}
void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
{ {
for (int i = 0; i < 32; ++i) for (int i = 0; i < 32; ++i)
@ -124,26 +212,12 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
// Has to be flushed if it isn't in a callee saved register // Has to be flushed if it isn't in a callee saved register
ARM64Reg host_reg = m_guest_registers[i].GetReg(); ARM64Reg host_reg = m_guest_registers[i].GetReg();
if (flush || !IsCalleeSaved(host_reg)) if (flush || !IsCalleeSaved(host_reg))
{ FlushRegister(i);
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i]));
Unlock(host_reg);
m_guest_registers[i].Flush();
}
} }
else if (m_guest_registers[i].GetType() == REG_IMM) else if (m_guest_registers[i].GetType() == REG_IMM)
{ {
if (flush) if (flush)
{ FlushRegister(i);
ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, m_guest_registers[i].GetImm());
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i]));
Unlock(host_reg);
m_guest_registers[i].Flush();
}
} }
else if (m_guest_registers[i].GetType() == REG_AWAY) else if (m_guest_registers[i].GetType() == REG_AWAY)
{ {
@ -173,39 +247,7 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
!IsCalleeSaved(host_reg_1) || !IsCalleeSaved(host_reg_1) ||
!IsCalleeSaved(host_reg_2)) !IsCalleeSaved(host_reg_2))
{ {
FlushRegister(i); // Will flush both pairs of registers
if (host_reg_1 == INVALID_REG)
{
// We never loaded this register
// We've got to test the state of our shared register
// Currently it is always reg+1
if (host_reg_2 == INVALID_REG)
{
// We didn't load either of these registers
// This can happen in cases where we had to flush register state
// or if we hit an interpreted instruction before we could use it
// Dump the whole thing in one go and flush both registers
// 64bit host register will store 2 32bit store registers in one go
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i]));
}
else
{
// Alright, bottom register isn't used, but top one is
// Only store the top one
m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[i + 1]));
Unlock(host_reg_2);
}
}
else
{
m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[i]));
Unlock(host_reg_1);
}
// Flush both registers
m_guest_registers[i].Flush();
m_guest_registers[i + 1].Flush();
Unlock(DecodeReg(host_reg));
} }
// Skip the next register since we've handled it here // Skip the next register since we've handled it here
++i; ++i;
@ -216,6 +258,9 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
ARM64Reg Arm64GPRCache::R(u32 preg) ARM64Reg Arm64GPRCache::R(u32 preg)
{ {
OpArg& reg = m_guest_registers[preg]; OpArg& reg = m_guest_registers[preg];
IncrementAllUsed();
reg.ResetLastUsed();
switch (reg.GetType()) switch (reg.GetType())
{ {
case REG_REG: // already in a reg case REG_REG: // already in a reg
@ -225,6 +270,8 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
{ {
ARM64Reg host_reg = GetReg(); ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, reg.GetImm()); m_emit->MOVI2R(host_reg, reg.GetImm());
reg.LoadToReg(host_reg);
return host_reg;
} }
break; break;
case REG_AWAY: // Register is away in a shared register case REG_AWAY: // Register is away in a shared register
@ -232,22 +279,57 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
// Let's do the voodoo that we dodo // Let's do the voodoo that we dodo
if (reg.GetReg() == INVALID_REG) if (reg.GetReg() == INVALID_REG)
{ {
// Alright, we need to move to a valid location
ARM64Reg host_reg = GetReg();
reg.LoadAwayToReg(host_reg);
// Alright, we need to extract from our away register // Alright, we need to extract from our away register
// To our new 32bit register // To our new 32bit register
if (reg.GetAwayLocation() == REG_LOW) if (reg.GetAwayLocation() == REG_LOW)
{ {
// We are in the low bits OpArg& upper_reg = m_guest_registers[preg + 1];
// Just move it over to the low bits of the new register if (upper_reg.GetType() == REG_REG)
m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 0, 31); {
// If the upper reg is already moved away, just claim this one as ours now
ARM64Reg host_reg = reg.GetAwayReg();
reg.LoadToReg(DecodeReg(host_reg));
return host_reg;
}
else
{
// Top register is still loaded
// Make sure to move to a new register
ARM64Reg host_reg = GetReg();
ARM64Reg current_reg = reg.GetAwayReg();
reg.LoadToReg(host_reg);
// We are in the low bits
// Just move it over to the low bits of the new register
m_emit->UBFM(EncodeRegTo64(host_reg), current_reg, 0, 31);
return host_reg;
}
} }
else else
{ {
// We are in the high bits OpArg& lower_reg = m_guest_registers[preg - 1];
m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 32, 63); if (lower_reg.GetType() == REG_REG)
{
// If the lower register is moved away, claim this one as ours
ARM64Reg host_reg = reg.GetAwayReg();
reg.LoadToReg(DecodeReg(host_reg));
// Make sure to move our register from the high bits to the low bits
m_emit->UBFM(EncodeRegTo64(host_reg), host_reg, 32, 63);
return host_reg;
}
else
{
// Load this register in to the new low bits
// We are no longer away
ARM64Reg host_reg = GetReg();
ARM64Reg current_reg = reg.GetAwayReg();
reg.LoadToReg(host_reg);
// We are in the high bits
m_emit->UBFM(EncodeRegTo64(host_reg), current_reg, 32, 63);
return host_reg;
}
} }
} }
else else
@ -259,10 +341,8 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
break; break;
case REG_NOTLOADED: // Register isn't loaded at /all/ case REG_NOTLOADED: // Register isn't loaded at /all/
{ {
// This is kind of annoying, we shouldn't have gotten here // This is a bit annoying. We try to keep these preloaded as much as possible
// This can happen with instructions that use multiple registers(eg lmw) // This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics
// The PPCAnalyst needs to be modified to handle these cases
_dbg_assert_msg_(DYNA_REC, false, "Hit REG_NOTLOADED type oparg. Fix the PPCAnalyst");
ARM64Reg host_reg = GetReg(); ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg); reg.LoadToReg(host_reg);
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
@ -277,7 +357,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
return INVALID_REG; return INVALID_REG;
} }
void Arm64GPRCache::GetAllocationOrder(void) void Arm64GPRCache::GetAllocationOrder()
{ {
// Callee saved registers first in hopes that we will keep everything stored there first // Callee saved registers first in hopes that we will keep everything stored there first
const std::vector<ARM64Reg> allocation_order = const std::vector<ARM64Reg> allocation_order =
@ -292,6 +372,24 @@ void Arm64GPRCache::GetAllocationOrder(void)
m_host_registers.push_back(HostReg(reg)); m_host_registers.push_back(HostReg(reg));
} }
void Arm64GPRCache::FlushMostStaleRegister()
{
u32 most_stale_preg = 0;
u32 most_stale_amount = 0;
for (u32 i = 0; i < 32; ++i)
{
u32 last_used = m_guest_registers[i].GetLastUsed();
if (last_used > most_stale_amount &&
m_guest_registers[i].GetType() != REG_IMM &&
m_guest_registers[i].GetType() != REG_NOTLOADED)
{
most_stale_preg = i;
most_stale_amount = last_used;
}
}
FlushRegister(most_stale_preg);
}
// FPR Cache // FPR Cache
void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
{ {
@ -303,7 +401,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg)
// XXX: return a host reg holding a guest register // XXX: return a host reg holding a guest register
} }
void Arm64FPRCache::GetAllocationOrder(void) void Arm64FPRCache::GetAllocationOrder()
{ {
const std::vector<ARM64Reg> allocation_order = const std::vector<ARM64Reg> allocation_order =
{ {
@ -317,3 +415,8 @@ void Arm64FPRCache::GetAllocationOrder(void)
m_host_registers.push_back(HostReg(reg)); m_host_registers.push_back(HostReg(reg));
} }
void Arm64FPRCache::FlushMostStaleRegister()
{
// XXX: Flush a register
}

View File

@ -42,10 +42,9 @@ class OpArg
{ {
public: public:
OpArg() OpArg()
: m_type(REG_NOTLOADED), m_reg(INVALID_REG),
m_value(0), m_last_used(0)
{ {
m_type = REG_NOTLOADED;
m_reg = INVALID_REG;
m_value = 0;
} }
RegType GetType() RegType GetType()
@ -73,30 +72,40 @@ public:
{ {
m_type = REG_REG; m_type = REG_REG;
m_reg = reg; m_reg = reg;
m_away_reg = INVALID_REG;
} }
void LoadToAway(ARM64Reg reg, RegLocation location) void LoadToAway(ARM64Reg reg, RegLocation location)
{ {
m_type = REG_AWAY; m_type = REG_AWAY;
m_reg = INVALID_REG;
m_away_reg = reg; m_away_reg = reg;
m_away_location = location; m_away_location = location;
}
void LoadAwayToReg(ARM64Reg reg) m_reg = INVALID_REG;
{
// We are still an away type
// We just are also in another register
m_reg = reg;
} }
void LoadToImm(u32 imm) void LoadToImm(u32 imm)
{ {
m_type = REG_IMM; m_type = REG_IMM;
m_value = imm; m_value = imm;
m_reg = INVALID_REG;
m_away_reg = INVALID_REG;
} }
void Flush() void Flush()
{ {
// Invalidate any previous information
m_type = REG_NOTLOADED; m_type = REG_NOTLOADED;
m_reg = INVALID_REG;
m_away_reg = INVALID_REG;
// Arbitrarily large value that won't roll over on a lot of increments
m_last_used = 0xFFFF;
} }
u32 GetLastUsed() { return m_last_used; }
void ResetLastUsed() { m_last_used = 0; }
void IncrementLastUsed() { ++m_last_used; }
private: private:
// For REG_REG // For REG_REG
RegType m_type; // store type RegType m_type; // store type
@ -110,6 +119,8 @@ private:
// For REG_IMM // For REG_IMM
u32 m_value; // IMM value u32 m_value; // IMM value
u32 m_last_used;
}; };
class HostReg class HostReg
@ -117,10 +128,10 @@ class HostReg
public: public:
HostReg() : m_reg(INVALID_REG), m_locked(false) {} HostReg() : m_reg(INVALID_REG), m_locked(false) {}
HostReg(ARM64Reg reg) : m_reg(reg), m_locked(false) {} HostReg(ARM64Reg reg) : m_reg(reg), m_locked(false) {}
bool IsLocked(void) { return m_locked; } bool IsLocked() { return m_locked; }
void Lock(void) { m_locked = true; } void Lock() { m_locked = true; }
void Unlock(void) { m_locked = false; } void Unlock() { m_locked = false; }
ARM64Reg GetReg(void) { return m_reg; } ARM64Reg GetReg() { return m_reg; }
bool operator==(const ARM64Reg& reg) bool operator==(const ARM64Reg& reg)
{ {
@ -135,7 +146,7 @@ private:
class Arm64RegCache class Arm64RegCache
{ {
public: public:
Arm64RegCache(void) : m_emit(nullptr), m_reg_stats(nullptr) {}; Arm64RegCache() : m_emit(nullptr), m_reg_stats(nullptr) {};
virtual ~Arm64RegCache() {}; virtual ~Arm64RegCache() {};
void Init(ARM64XEmitter *emitter); void Init(ARM64XEmitter *emitter);
@ -151,7 +162,7 @@ public:
// Returns a temporary register for use // Returns a temporary register for use
// Requires unlocking after done // Requires unlocking after done
ARM64Reg GetReg(void); ARM64Reg GetReg();
// Locks a register so a cache cannot use it // Locks a register so a cache cannot use it
// Useful for function calls // Useful for function calls
@ -177,7 +188,10 @@ public:
protected: protected:
// Get the order of the host registers // Get the order of the host registers
virtual void GetAllocationOrder(void) = 0; virtual void GetAllocationOrder() = 0;
// Flushes the most stale register
virtual void FlushMostStaleRegister() = 0;
// Lock a register // Lock a register
void LockRegister(ARM64Reg host_reg); void LockRegister(ARM64Reg host_reg);
@ -185,6 +199,9 @@ protected:
// Unlock a register // Unlock a register
void UnlockRegister(ARM64Reg host_reg); void UnlockRegister(ARM64Reg host_reg);
// Get available host registers
u32 GetUnlockedRegisterCount();
// Code emitter // Code emitter
ARM64XEmitter *m_emit; ARM64XEmitter *m_emit;
@ -220,7 +237,10 @@ public:
protected: protected:
// Get the order of the host registers // Get the order of the host registers
void GetAllocationOrder(void); void GetAllocationOrder();
// Flushes the most stale register
void FlushMostStaleRegister();
// Our guest GPRs // Our guest GPRs
// PowerPC has 32 GPRs // PowerPC has 32 GPRs
@ -228,6 +248,14 @@ protected:
private: private:
bool IsCalleeSaved(ARM64Reg reg); bool IsCalleeSaved(ARM64Reg reg);
void IncrementAllUsed()
{
for (auto& reg : m_guest_registers)
reg.IncrementLastUsed();
}
void FlushRegister(u32 preg);
}; };
class Arm64FPRCache : public Arm64RegCache class Arm64FPRCache : public Arm64RegCache
@ -243,7 +271,10 @@ public:
protected: protected:
// Get the order of the host registers // Get the order of the host registers
void GetAllocationOrder(void); void GetAllocationOrder();
// Flushes the most stale register
void FlushMostStaleRegister();
// Our guest FPRs // Our guest FPRs
// Gekko has 32 paired registers(32x2) // Gekko has 32 paired registers(32x2)