JitArm64: Lock fewer registers when assumeNoPairedQuantize

This commit is contained in:
JosJuice 2021-07-10 13:21:27 +02:00
parent 96760093e9
commit 93e968208e
1 changed files with 34 additions and 17 deletions

View File

@ -36,8 +36,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I; const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W; const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2);
fpr.Lock(ARM64Reg::Q1);
}
constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1; constexpr ARM64Reg scale_reg = ARM64Reg::W1;
@ -73,8 +78,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries // Wipe the registers we are using as temporaries
gprs_in_use &= BitSet32(~7); gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use &= BitSet32(~3); fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
fprs_in_use[DecodeReg(VS)] = 0; fprs_in_use[DecodeReg(VS)] = 0;
u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
@ -103,8 +108,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0); m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q1);
}
} }
void JitArm64::psq_stXX(UGeckoInstruction inst) void JitArm64::psq_stXX(UGeckoInstruction inst)
@ -127,6 +137,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I; const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W; const int w = indexed ? inst.Wx : inst.W;
if (!js.assumeNoPairedQuantize)
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const bool have_single = fpr.IsSingle(inst.RS); const bool have_single = fpr.IsSingle(inst.RS);
@ -162,7 +173,9 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
} }
} }
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
gpr.Lock(ARM64Reg::W2);
constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
@ -191,15 +204,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg); MOV(gpr.R(inst.RA), addr_reg);
} }
if (js.assumeNoPairedQuantize)
{
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries // Wipe the registers we are using as temporaries
gprs_in_use &= BitSet32(~7); gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use &= BitSet32(~3); gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (js.assumeNoPairedQuantize)
{
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w) if (!w)
flags |= BackPatchInfo::FLAG_PAIR; flags |= BackPatchInfo::FLAG_PAIR;
@ -221,6 +234,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize && !have_single) if (js.assumeNoPairedQuantize && !have_single)
fpr.Unlock(VS); fpr.Unlock(VS);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
} }