Merge pull request #5326 from MerryMage/cr_cache
JitArm64: Implement CR cache
This commit is contained in:
commit
acba6a0d5f
|
@ -215,7 +215,6 @@ private:
|
|||
// AsmRoutines
|
||||
void GenerateAsm();
|
||||
void GenerateCommonAsm();
|
||||
void GenMfcr();
|
||||
|
||||
// Profiling
|
||||
void BeginTimeProfile(JitBlock* b);
|
||||
|
|
|
@ -268,8 +268,8 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
|||
ARM64Reg VA = reg_encoder(fpr.R(a, type));
|
||||
ARM64Reg VB = reg_encoder(fpr.R(b, type));
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
gpr.BindCRToRegister(crf, false);
|
||||
ARM64Reg XA = gpr.CR(crf);
|
||||
|
||||
FixupBranch pNaN, pLesser, pGreater;
|
||||
FixupBranch continue1, continue2, continue3;
|
||||
|
@ -312,10 +312,6 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
|||
SetJumpTarget(continue3);
|
||||
}
|
||||
SetJumpTarget(continue1);
|
||||
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||
|
|
|
@ -17,33 +17,23 @@ using namespace Arm64Gen;
|
|||
|
||||
void JitArm64::ComputeRC(ARM64Reg reg, int crf, bool needs_sext)
|
||||
{
|
||||
gpr.BindCRToRegister(crf, false);
|
||||
if (needs_sext)
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
|
||||
SXTW(XA, reg);
|
||||
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
gpr.Unlock(WA);
|
||||
SXTW(gpr.CR(crf), reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
STR(INDEX_UNSIGNED, EncodeRegTo64(reg), PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
MOV(gpr.CR(crf), EncodeRegTo64(reg));
|
||||
}
|
||||
}
|
||||
|
||||
void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext)
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
|
||||
MOVI2R(XA, imm);
|
||||
gpr.BindCRToRegister(crf, false);
|
||||
MOVI2R(gpr.CR(crf), imm);
|
||||
if (imm & 0x80000000 && needs_sext)
|
||||
SXTW(XA, WA);
|
||||
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
gpr.Unlock(WA);
|
||||
SXTW(gpr.CR(crf), DecodeReg(gpr.CR(crf)));
|
||||
}
|
||||
|
||||
void JitArm64::ComputeCarry(bool Carry)
|
||||
|
@ -425,19 +415,18 @@ void JitArm64::cmp(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
|
||||
gpr.BindCRToRegister(crf, false);
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
ARM64Reg XB = EncodeRegTo64(WB);
|
||||
ARM64Reg RA = gpr.R(a);
|
||||
ARM64Reg RB = gpr.R(b);
|
||||
ARM64Reg CR = gpr.CR(crf);
|
||||
|
||||
SXTW(XA, RA);
|
||||
SXTW(XB, RB);
|
||||
SXTW(CR, RB);
|
||||
SUB(CR, XA, CR);
|
||||
|
||||
SUB(XA, XA, XB);
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
|
||||
gpr.Unlock(WA, WB);
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::cmpl(UGeckoInstruction inst)
|
||||
|
@ -459,11 +448,8 @@ void JitArm64::cmpl(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b)));
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
gpr.Unlock(WA);
|
||||
gpr.BindCRToRegister(crf, false);
|
||||
SUB(gpr.CR(crf), EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b)));
|
||||
}
|
||||
|
||||
void JitArm64::cmpi(UGeckoInstruction inst)
|
||||
|
@ -507,13 +493,10 @@ void JitArm64::cmpli(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
gpr.BindCRToRegister(crf, false);
|
||||
ARM64Reg XA = gpr.CR(crf);
|
||||
|
||||
SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA);
|
||||
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::rlwinmx(UGeckoInstruction inst)
|
||||
|
|
|
@ -92,6 +92,15 @@ void Arm64RegCache::FlushMostStaleRegister()
|
|||
}
|
||||
|
||||
// GPR Cache
|
||||
constexpr size_t GUEST_GPR_COUNT = 32;
|
||||
constexpr size_t GUEST_CR_COUNT = 8;
|
||||
constexpr size_t GUEST_GPR_OFFSET = 0;
|
||||
constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT;
|
||||
|
||||
Arm64GPRCache::Arm64GPRCache() : Arm64RegCache(GUEST_GPR_COUNT + GUEST_CR_COUNT)
|
||||
{
|
||||
}
|
||||
|
||||
void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats& stats)
|
||||
{
|
||||
}
|
||||
|
@ -105,18 +114,48 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg)
|
|||
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
|
||||
}
|
||||
|
||||
void Arm64GPRCache::FlushRegister(size_t preg, bool maintain_state)
|
||||
const OpArg& Arm64GPRCache::GetGuestGPROpArg(size_t preg) const
|
||||
{
|
||||
OpArg& reg = m_guest_registers[preg];
|
||||
_assert_(preg < GUEST_GPR_COUNT);
|
||||
return m_guest_registers[preg];
|
||||
}
|
||||
|
||||
Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestGPR(size_t preg)
|
||||
{
|
||||
_assert_(preg < GUEST_GPR_COUNT);
|
||||
return {32, PPCSTATE_OFF(gpr[preg]), m_guest_registers[GUEST_GPR_OFFSET + preg]};
|
||||
}
|
||||
|
||||
Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestCR(size_t preg)
|
||||
{
|
||||
_assert_(preg < GUEST_CR_COUNT);
|
||||
return {64, PPCSTATE_OFF(cr_val[preg]), m_guest_registers[GUEST_CR_OFFSET + preg]};
|
||||
}
|
||||
|
||||
Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestByIndex(size_t index)
|
||||
{
|
||||
if (index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT)
|
||||
return GetGuestGPR(index - GUEST_GPR_OFFSET);
|
||||
if (index >= GUEST_CR_OFFSET && index < GUEST_CR_OFFSET + GUEST_CR_COUNT)
|
||||
return GetGuestCR(index - GUEST_CR_OFFSET);
|
||||
_assert_msg_(DYNA_REC, false, "Invalid index for guest register");
|
||||
}
|
||||
|
||||
void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
|
||||
{
|
||||
GuestRegInfo guest_reg = GetGuestByIndex(index);
|
||||
OpArg& reg = guest_reg.reg;
|
||||
size_t bitsize = guest_reg.bitsize;
|
||||
|
||||
if (reg.GetType() == REG_REG)
|
||||
{
|
||||
ARM64Reg host_reg = reg.GetReg();
|
||||
if (reg.IsDirty())
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
||||
|
||||
if (!maintain_state)
|
||||
{
|
||||
UnlockRegister(host_reg);
|
||||
UnlockRegister(DecodeReg(host_reg));
|
||||
reg.Flush();
|
||||
}
|
||||
}
|
||||
|
@ -124,16 +163,16 @@ void Arm64GPRCache::FlushRegister(size_t preg, bool maintain_state)
|
|||
{
|
||||
if (!reg.GetImm())
|
||||
{
|
||||
m_emit->STR(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(gpr[preg]));
|
||||
m_emit->STR(INDEX_UNSIGNED, bitsize == 64 ? ZR : WZR, PPC_REG, guest_reg.ppc_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
ARM64Reg host_reg = GetReg();
|
||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||
|
||||
m_emit->MOVI2R(host_reg, reg.GetImm());
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
||||
|
||||
UnlockRegister(host_reg);
|
||||
UnlockRegister(DecodeReg(host_reg));
|
||||
}
|
||||
|
||||
if (!maintain_state)
|
||||
|
@ -143,11 +182,11 @@ void Arm64GPRCache::FlushRegister(size_t preg, bool maintain_state)
|
|||
|
||||
void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
|
||||
{
|
||||
for (size_t i = 0; i < m_guest_registers.size(); ++i)
|
||||
for (size_t i = 0; i < GUEST_GPR_COUNT; ++i)
|
||||
{
|
||||
if (regs[i])
|
||||
{
|
||||
if (i < 31 && regs[i + 1])
|
||||
if (i + 1 < GUEST_GPR_COUNT && regs[i + 1])
|
||||
{
|
||||
// We've got two guest registers in a row to store
|
||||
OpArg& reg1 = m_guest_registers[i];
|
||||
|
@ -155,14 +194,14 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
|
|||
if (reg1.IsDirty() && reg2.IsDirty() && reg1.GetType() == REG_REG &&
|
||||
reg2.GetType() == REG_REG)
|
||||
{
|
||||
ARM64Reg RX1 = R(i);
|
||||
ARM64Reg RX2 = R(i + 1);
|
||||
|
||||
m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32));
|
||||
size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
|
||||
ARM64Reg RX1 = R(GetGuestByIndex(i));
|
||||
ARM64Reg RX2 = R(GetGuestByIndex(i + 1));
|
||||
m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, ppc_offset);
|
||||
if (!maintain_state)
|
||||
{
|
||||
UnlockRegister(RX1);
|
||||
UnlockRegister(RX2);
|
||||
UnlockRegister(DecodeReg(RX1));
|
||||
UnlockRegister(DecodeReg(RX2));
|
||||
reg1.Flush();
|
||||
reg2.Flush();
|
||||
}
|
||||
|
@ -171,7 +210,18 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
|
|||
}
|
||||
}
|
||||
|
||||
FlushRegister(i, maintain_state);
|
||||
FlushRegister(GUEST_GPR_OFFSET + i, maintain_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64GPRCache::FlushCRRegisters(BitSet32 regs, bool maintain_state)
|
||||
{
|
||||
for (size_t i = 0; i < GUEST_CR_COUNT; ++i)
|
||||
{
|
||||
if (regs[i])
|
||||
{
|
||||
FlushRegister(GUEST_CR_OFFSET + i, maintain_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +229,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
|
|||
void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
{
|
||||
BitSet32 to_flush;
|
||||
for (size_t i = 0; i < m_guest_registers.size(); ++i)
|
||||
for (size_t i = 0; i < GUEST_GPR_COUNT; ++i)
|
||||
{
|
||||
bool flush = true;
|
||||
if (m_guest_registers[i].GetType() == REG_REG)
|
||||
|
@ -192,11 +242,14 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
|||
to_flush[i] = flush;
|
||||
}
|
||||
FlushRegisters(to_flush, mode == FLUSH_MAINTAIN_STATE);
|
||||
FlushCRRegisters(BitSet32(~0U), mode == FLUSH_MAINTAIN_STATE);
|
||||
}
|
||||
|
||||
ARM64Reg Arm64GPRCache::R(size_t preg)
|
||||
ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg)
|
||||
{
|
||||
OpArg& reg = m_guest_registers[preg];
|
||||
OpArg& reg = guest_reg.reg;
|
||||
size_t bitsize = guest_reg.bitsize;
|
||||
|
||||
IncrementAllUsed();
|
||||
reg.ResetLastUsed();
|
||||
|
||||
|
@ -207,7 +260,7 @@ ARM64Reg Arm64GPRCache::R(size_t preg)
|
|||
break;
|
||||
case REG_IMM: // Is an immediate
|
||||
{
|
||||
ARM64Reg host_reg = GetReg();
|
||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||
m_emit->MOVI2R(host_reg, reg.GetImm());
|
||||
reg.Load(host_reg);
|
||||
reg.SetDirty(true);
|
||||
|
@ -219,10 +272,10 @@ ARM64Reg Arm64GPRCache::R(size_t preg)
|
|||
// This is a bit annoying. We try to keep these preloaded as much as possible
|
||||
// This can also happen on cases where PPCAnalyst isn't feeing us proper register usage
|
||||
// statistics
|
||||
ARM64Reg host_reg = GetReg();
|
||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||
reg.Load(host_reg);
|
||||
reg.SetDirty(false);
|
||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
|
||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
||||
return host_reg;
|
||||
}
|
||||
break;
|
||||
|
@ -234,27 +287,28 @@ ARM64Reg Arm64GPRCache::R(size_t preg)
|
|||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void Arm64GPRCache::SetImmediate(size_t preg, u32 imm)
|
||||
void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm)
|
||||
{
|
||||
OpArg& reg = m_guest_registers[preg];
|
||||
OpArg& reg = guest_reg.reg;
|
||||
if (reg.GetType() == REG_REG)
|
||||
UnlockRegister(reg.GetReg());
|
||||
UnlockRegister(DecodeReg(reg.GetReg()));
|
||||
reg.LoadToImm(imm);
|
||||
}
|
||||
|
||||
void Arm64GPRCache::BindToRegister(size_t preg, bool do_load)
|
||||
void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load)
|
||||
{
|
||||
OpArg& reg = m_guest_registers[preg];
|
||||
OpArg& reg = guest_reg.reg;
|
||||
size_t bitsize = guest_reg.bitsize;
|
||||
|
||||
reg.ResetLastUsed();
|
||||
|
||||
reg.SetDirty(true);
|
||||
if (reg.GetType() == REG_NOTLOADED)
|
||||
{
|
||||
ARM64Reg host_reg = GetReg();
|
||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||
reg.Load(host_reg);
|
||||
if (do_load)
|
||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
|
||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -278,16 +332,17 @@ BitSet32 Arm64GPRCache::GetCallerSavedUsed()
|
|||
BitSet32 registers(0);
|
||||
for (auto& it : m_host_registers)
|
||||
if (it.IsLocked() && !IsCalleeSaved(it.GetReg()))
|
||||
registers[it.GetReg()] = 1;
|
||||
registers[DecodeReg(it.GetReg())] = 1;
|
||||
return registers;
|
||||
}
|
||||
|
||||
void Arm64GPRCache::FlushByHost(ARM64Reg host_reg)
|
||||
{
|
||||
host_reg = DecodeReg(host_reg);
|
||||
for (size_t i = 0; i < m_guest_registers.size(); ++i)
|
||||
{
|
||||
const OpArg& reg = m_guest_registers[i];
|
||||
if (reg.GetType() == REG_REG && reg.GetReg() == host_reg)
|
||||
if (reg.GetType() == REG_REG && DecodeReg(reg.GetReg()) == host_reg)
|
||||
{
|
||||
FlushRegister(i, false);
|
||||
return;
|
||||
|
@ -296,6 +351,12 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg)
|
|||
}
|
||||
|
||||
// FPR Cache
|
||||
constexpr size_t GUEST_FPR_COUNT = 32;
|
||||
|
||||
Arm64FPRCache::Arm64FPRCache() : Arm64RegCache(GUEST_FPR_COUNT)
|
||||
{
|
||||
}
|
||||
|
||||
void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
{
|
||||
for (size_t i = 0; i < m_guest_registers.size(); ++i)
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
@ -118,7 +117,9 @@ private:
|
|||
class Arm64RegCache
|
||||
{
|
||||
public:
|
||||
Arm64RegCache() : m_emit(nullptr), m_float_emit(nullptr), m_reg_stats(nullptr){};
|
||||
explicit Arm64RegCache(size_t guest_reg_count)
|
||||
: m_emit(nullptr), m_float_emit(nullptr), m_guest_registers(guest_reg_count),
|
||||
m_reg_stats(nullptr){};
|
||||
virtual ~Arm64RegCache(){};
|
||||
|
||||
void Init(ARM64XEmitter* emitter);
|
||||
|
@ -133,7 +134,6 @@ public:
|
|||
// Requires unlocking after done
|
||||
ARM64Reg GetReg();
|
||||
|
||||
void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); }
|
||||
// Locks a register so a cache cannot use it
|
||||
// Useful for function calls
|
||||
template <typename T = ARM64Reg, typename... Args>
|
||||
|
@ -176,8 +176,6 @@ protected:
|
|||
|
||||
virtual void FlushRegister(size_t preg, bool maintain_state) = 0;
|
||||
|
||||
virtual void FlushRegisters(BitSet32 regs, bool maintain_state) = 0;
|
||||
|
||||
// Get available host registers
|
||||
u32 GetUnlockedRegisterCount();
|
||||
|
||||
|
@ -197,9 +195,9 @@ protected:
|
|||
std::vector<HostReg> m_host_registers;
|
||||
|
||||
// Our guest GPRs
|
||||
// PowerPC has 32 GPRs
|
||||
// PowerPC has 32 GPRs and 8 CRs
|
||||
// PowerPC also has 32 paired FPRs
|
||||
std::array<OpArg, 32> m_guest_registers;
|
||||
std::vector<OpArg> m_guest_registers;
|
||||
|
||||
// Register stats for the current block
|
||||
PPCAnalyst::BlockRegStats* m_reg_stats;
|
||||
|
@ -208,27 +206,32 @@ protected:
|
|||
class Arm64GPRCache : public Arm64RegCache
|
||||
{
|
||||
public:
|
||||
Arm64GPRCache();
|
||||
~Arm64GPRCache() {}
|
||||
void Start(PPCAnalyst::BlockRegStats& stats) override;
|
||||
|
||||
// Flushes the register cache in different ways depending on the mode
|
||||
void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr) override;
|
||||
|
||||
// Returns a guest register inside of a host register
|
||||
// Returns a guest GPR inside of a host register
|
||||
// Will dump an immediate to the host register as well
|
||||
ARM64Reg R(size_t preg);
|
||||
|
||||
// Set a register to an immediate
|
||||
void SetImmediate(size_t preg, u32 imm);
|
||||
|
||||
// Returns if a register is set as an immediate
|
||||
bool IsImm(size_t reg) const { return m_guest_registers[reg].GetType() == REG_IMM; }
|
||||
// Gets the immediate that a register is set to
|
||||
u32 GetImm(size_t reg) const { return m_guest_registers[reg].GetImm(); }
|
||||
void BindToRegister(size_t preg, bool do_load);
|
||||
|
||||
ARM64Reg R(size_t preg) { return R(GetGuestGPR(preg)); }
|
||||
// Returns a guest CR inside of a host register
|
||||
ARM64Reg CR(size_t preg) { return R(GetGuestCR(preg)); }
|
||||
// Set a register to an immediate, only valid for guest GPRs
|
||||
void SetImmediate(size_t preg, u32 imm) { SetImmediate(GetGuestGPR(preg), imm); }
|
||||
// Returns if a register is set as an immediate, only valid for guest GPRs
|
||||
bool IsImm(size_t preg) const { return GetGuestGPROpArg(preg).GetType() == REG_IMM; }
|
||||
// Gets the immediate that a register is set to, only valid for guest GPRs
|
||||
u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); }
|
||||
// Binds a guest GPR to a host register, optionally loading its value
|
||||
void BindToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestGPR(preg), do_load); }
|
||||
// Binds a guest CR to a host register, optionally loading its value
|
||||
void BindCRToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestCR(preg), do_load); }
|
||||
BitSet32 GetCallerSavedUsed() override;
|
||||
|
||||
void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); }
|
||||
void StoreCRRegisters(BitSet32 regs) { FlushCRRegisters(regs, false); }
|
||||
protected:
|
||||
// Get the order of the host registers
|
||||
void GetAllocationOrder() override;
|
||||
|
@ -236,17 +239,35 @@ protected:
|
|||
// Flushes a guest register by host provided
|
||||
void FlushByHost(ARM64Reg host_reg) override;
|
||||
|
||||
void FlushRegister(size_t preg, bool maintain_state) override;
|
||||
|
||||
void FlushRegisters(BitSet32 regs, bool maintain_state) override;
|
||||
void FlushRegister(size_t index, bool maintain_state) override;
|
||||
|
||||
private:
|
||||
bool IsCalleeSaved(ARM64Reg reg);
|
||||
|
||||
struct GuestRegInfo
|
||||
{
|
||||
size_t bitsize;
|
||||
size_t ppc_offset;
|
||||
OpArg& reg;
|
||||
};
|
||||
|
||||
const OpArg& GetGuestGPROpArg(size_t preg) const;
|
||||
GuestRegInfo GetGuestGPR(size_t preg);
|
||||
GuestRegInfo GetGuestCR(size_t preg);
|
||||
GuestRegInfo GetGuestByIndex(size_t index);
|
||||
|
||||
ARM64Reg R(const GuestRegInfo& guest_reg);
|
||||
void SetImmediate(const GuestRegInfo& guest_reg, u32 imm);
|
||||
void BindToRegister(const GuestRegInfo& guest_reg, bool do_load);
|
||||
|
||||
void FlushRegisters(BitSet32 regs, bool maintain_state);
|
||||
void FlushCRRegisters(BitSet32 regs, bool maintain_state);
|
||||
};
|
||||
|
||||
class Arm64FPRCache : public Arm64RegCache
|
||||
{
|
||||
public:
|
||||
Arm64FPRCache();
|
||||
~Arm64FPRCache() {}
|
||||
// Flushes the register cache in different ways depending on the mode
|
||||
void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr) override;
|
||||
|
@ -263,6 +284,7 @@ public:
|
|||
|
||||
void FixSinglePrecision(size_t preg);
|
||||
|
||||
void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); }
|
||||
protected:
|
||||
// Get the order of the host registers
|
||||
void GetAllocationOrder() override;
|
||||
|
@ -272,8 +294,8 @@ protected:
|
|||
|
||||
void FlushRegister(size_t preg, bool maintain_state) override;
|
||||
|
||||
void FlushRegisters(BitSet32 regs, bool maintain_state) override;
|
||||
|
||||
private:
|
||||
bool IsCalleeSaved(ARM64Reg reg);
|
||||
|
||||
void FlushRegisters(BitSet32 regs, bool maintain_state);
|
||||
};
|
||||
|
|
|
@ -14,35 +14,23 @@
|
|||
|
||||
FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
ARM64Reg XA = gpr.CR(field);
|
||||
ARM64Reg WA = DecodeReg(XA);
|
||||
|
||||
FixupBranch branch;
|
||||
switch (bit)
|
||||
{
|
||||
case CR_SO_BIT: // check bit 61 set
|
||||
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61);
|
||||
break;
|
||||
return jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61);
|
||||
case CR_EQ_BIT: // check bits 31-0 == 0
|
||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
branch = jump_if_set ? CBZ(WA) : CBNZ(WA);
|
||||
break;
|
||||
return jump_if_set ? CBZ(WA) : CBNZ(WA);
|
||||
case CR_GT_BIT: // check val > 0
|
||||
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
CMP(XA, SP);
|
||||
branch = B(jump_if_set ? CC_GT : CC_LE);
|
||||
break;
|
||||
return B(jump_if_set ? CC_GT : CC_LE);
|
||||
case CR_LT_BIT: // check bit 62 set
|
||||
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
|
||||
break;
|
||||
return jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
|
||||
}
|
||||
|
||||
gpr.Unlock(WA);
|
||||
return branch;
|
||||
}
|
||||
|
||||
void JitArm64::mtmsr(UGeckoInstruction inst)
|
||||
|
@ -79,11 +67,8 @@ void JitArm64::mcrf(UGeckoInstruction inst)
|
|||
|
||||
if (inst.CRFS != inst.CRFD)
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFS]));
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));
|
||||
gpr.Unlock(WA);
|
||||
gpr.BindCRToRegister(inst.CRFD, false);
|
||||
MOV(gpr.CR(inst.CRFD), gpr.CR(inst.CRFS));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,10 +77,11 @@ void JitArm64::mcrxr(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
|
||||
gpr.BindCRToRegister(inst.CRFD, false);
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
ARM64Reg XB = EncodeRegTo64(WB);
|
||||
ARM64Reg XB = gpr.CR(inst.CRFD);
|
||||
ARM64Reg WB = DecodeReg(XB);
|
||||
|
||||
// Copy XER[0-3] into CR[inst.CRFD]
|
||||
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
|
||||
|
@ -108,13 +94,12 @@ void JitArm64::mcrxr(UGeckoInstruction inst)
|
|||
|
||||
MOVP2R(XB, m_crTable.data());
|
||||
LDR(XB, XB, XA);
|
||||
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));
|
||||
|
||||
// Clear XER[0-3]
|
||||
STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_ca));
|
||||
STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov));
|
||||
|
||||
gpr.Unlock(WA, WB);
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::mfsr(UGeckoInstruction inst)
|
||||
|
@ -435,9 +420,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
int field = inst.CRBD >> 2;
|
||||
int bit = 3 - (inst.CRBD & 3);
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
gpr.BindCRToRegister(field, true);
|
||||
ARM64Reg XA = gpr.CR(field);
|
||||
switch (bit)
|
||||
{
|
||||
case CR_SO_BIT:
|
||||
|
@ -456,8 +440,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62)
|
||||
break;
|
||||
}
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
gpr.Unlock(WA);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -468,9 +450,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
int field = inst.CRBD >> 2;
|
||||
int bit = 3 - (inst.CRBD & 3);
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
gpr.BindCRToRegister(field, true);
|
||||
ARM64Reg XA = gpr.CR(field);
|
||||
|
||||
if (bit != CR_GT_BIT)
|
||||
{
|
||||
|
@ -502,9 +483,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
ORR(XA, XA, 32, 0, true); // XA | 1<<32
|
||||
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
gpr.Unlock(WA);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -527,9 +505,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
ARM64Reg out = i ? XB : XA;
|
||||
bool negate = i ? negateB : negateA;
|
||||
|
||||
ARM64Reg WC = gpr.GetReg();
|
||||
ARM64Reg XC = EncodeRegTo64(WC);
|
||||
LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
ARM64Reg XC = gpr.CR(field);
|
||||
ARM64Reg WC = DecodeReg(XC);
|
||||
switch (bit)
|
||||
{
|
||||
case CR_SO_BIT: // check bit 61 set
|
||||
|
@ -557,7 +534,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
default:
|
||||
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
|
||||
}
|
||||
gpr.Unlock(WC);
|
||||
}
|
||||
|
||||
// Compute combined bit
|
||||
|
@ -585,7 +561,10 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
int field = inst.CRBD >> 2;
|
||||
int bit = 3 - (inst.CRBD & 3);
|
||||
|
||||
LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
gpr.Unlock(WB);
|
||||
WB = INVALID_REG;
|
||||
gpr.BindCRToRegister(field, true);
|
||||
XB = gpr.CR(field);
|
||||
|
||||
// Gross but necessary; if the input is totally zero and we set SO or LT,
|
||||
// or even just add the (1<<32), GT will suddenly end up set without us
|
||||
|
@ -623,10 +602,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
ORR(XB, XB, 32, 0, true); // XB | 1<<32
|
||||
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field]));
|
||||
|
||||
gpr.Unlock(WA);
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
|
||||
void JitArm64::mfcr(UGeckoInstruction inst)
|
||||
|
@ -634,14 +611,44 @@ void JitArm64::mfcr(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
|
||||
gpr.Lock(W0, W1, W2, W30);
|
||||
BL(GetAsmRoutines()->mfcr);
|
||||
gpr.Unlock(W1, W2, W30);
|
||||
|
||||
gpr.BindToRegister(inst.RD, false);
|
||||
MOV(gpr.R(inst.RD), W0);
|
||||
ARM64Reg WA = gpr.R(inst.RD);
|
||||
ARM64Reg WC = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
ARM64Reg XC = EncodeRegTo64(WC);
|
||||
|
||||
gpr.Unlock(W0);
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
ARM64Reg CR = gpr.CR(i);
|
||||
ARM64Reg WCR = DecodeReg(CR);
|
||||
|
||||
// SO
|
||||
if (i == 0)
|
||||
{
|
||||
UBFX(XA, CR, 61, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
UBFX(XC, CR, 61, 1);
|
||||
ORR(XA, XC, XA, ArithOption(XA, ST_LSL, 4));
|
||||
}
|
||||
|
||||
// EQ
|
||||
ORR(WC, WA, 32 - 1, 0); // WA | 1<<1
|
||||
CMP(WCR, WZR);
|
||||
CSEL(WA, WC, WA, CC_EQ);
|
||||
|
||||
// GT
|
||||
ORR(WC, WA, 32 - 2, 0); // WA | 1<<2
|
||||
CMP(CR, ZR);
|
||||
CSEL(WA, WC, WA, CC_GT);
|
||||
|
||||
// LT
|
||||
UBFX(XC, CR, 62, 1);
|
||||
ORR(WA, WA, WC, ArithOption(WC, ST_LSL, 3));
|
||||
}
|
||||
|
||||
gpr.Unlock(WC);
|
||||
}
|
||||
|
||||
void JitArm64::mtcrf(UGeckoInstruction inst)
|
||||
|
@ -653,8 +660,6 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
|
|||
if (crm != 0)
|
||||
{
|
||||
ARM64Reg RS = gpr.R(inst.RS);
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
ARM64Reg XB = EncodeRegTo64(WB);
|
||||
MOVP2R(XB, m_crTable.data());
|
||||
|
@ -662,20 +667,23 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
|
|||
{
|
||||
if ((crm & (0x80 >> i)) != 0)
|
||||
{
|
||||
gpr.BindCRToRegister(i, false);
|
||||
ARM64Reg CR = gpr.CR(i);
|
||||
ARM64Reg WCR = DecodeReg(CR);
|
||||
|
||||
if (i != 7)
|
||||
LSR(WA, RS, 28 - i * 4);
|
||||
LSR(WCR, RS, 28 - i * 4);
|
||||
if (i != 0)
|
||||
{
|
||||
if (i != 7)
|
||||
UBFX(WA, WA, 0, 4);
|
||||
UBFX(WCR, WCR, 0, 4);
|
||||
else
|
||||
UBFX(WA, RS, 0, 4);
|
||||
UBFX(WCR, RS, 0, 4);
|
||||
}
|
||||
|
||||
LDR(XA, XB, ArithOption(XA, true));
|
||||
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[i]));
|
||||
LDR(CR, XB, ArithOption(CR, true));
|
||||
}
|
||||
}
|
||||
gpr.Unlock(WA, WB);
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -651,47 +651,5 @@ void JitArm64::GenerateCommonAsm()
|
|||
pairedStoreQuantized[30] = storeSingleS8Slow;
|
||||
pairedStoreQuantized[31] = storeSingleS16Slow;
|
||||
|
||||
GetAsmRoutines()->mfcr = AlignCode16();
|
||||
GenMfcr();
|
||||
}
|
||||
|
||||
void JitArm64::GenMfcr()
|
||||
{
|
||||
// Input: Nothing
|
||||
// Returns: W0
|
||||
// Clobbers: X1, X2
|
||||
const u8* start = GetCodePtr();
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val[i]));
|
||||
|
||||
// SO
|
||||
if (i == 0)
|
||||
{
|
||||
UBFX(X0, X1, 61, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ORR(W0, WZR, W0, ArithOption(W0, ST_LSL, 4));
|
||||
UBFX(X2, X1, 61, 1);
|
||||
ORR(X0, X0, X2);
|
||||
}
|
||||
|
||||
// EQ
|
||||
ORR(W2, W0, 32 - 1, 0); // W0 | 1<<1
|
||||
CMP(W1, WZR);
|
||||
CSEL(W0, W2, W0, CC_EQ);
|
||||
|
||||
// GT
|
||||
ORR(W2, W0, 32 - 2, 0); // W0 | 1<<2
|
||||
CMP(X1, ZR);
|
||||
CSEL(W0, W2, W0, CC_GT);
|
||||
|
||||
// LT
|
||||
UBFX(X2, X1, 62, 1);
|
||||
ORR(W0, W0, W2, ArithOption(W2, ST_LSL, 3));
|
||||
}
|
||||
|
||||
RET(X30);
|
||||
JitRegister::Register(start, GetCodePtr(), "JIT_Mfcr");
|
||||
GetAsmRoutines()->mfcr = nullptr;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue