Merge pull request #9472 from JosJuice/jitarm64-unexpected-double

JitArm64: Update registers last used before start of instruction
This commit is contained in:
Léo Lam 2021-01-27 12:43:11 +01:00 committed by GitHub
commit 2d75b0d2bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 74 additions and 10 deletions

View File

@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
BitSet32 fpr_used = op.fregsIn;
if (op.fregOut >= 0)
fpr_used[op.fregOut] = true;
fpr.UpdateLastUsed(fpr_used);
// Gather pipe writes using a non-immediate address are discovered by profiling.
bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end();

View File

@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
bool use_b = op5 != 25; // fmul uses no B
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
(!use_c || fpr.IsSingle(c, !packed));
const auto inputs_are_singles_func = [&] {
return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
(!use_c || fpr.IsSingle(c, !packed));
};
const bool inputs_are_singles = inputs_are_singles_func();
ARM64Reg VA{}, VB{}, VC{}, VD{};
@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
}
}
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
"Register allocation turned singles into doubles in the middle of fp_arith");
if (single || packed)
fpr.FixSinglePrecision(d);
}
@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
break;
}
}
ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed),
"Register allocation turned singles into doubles in the middle of fp_logic");
}
void JitArm64::fselx(UGeckoInstruction inst)
@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst)
const u32 c = inst.FC;
const u32 d = inst.FD;
const bool a_single = fpr.IsSingle(a, true);
if (fpr.IsSingle(a, true))
{
const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle);
@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst)
m_float_emit.FCMPE(EncodeRegToDouble(VA));
}
const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair;
const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble;
const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;
const ARM64Reg VB = fpr.R(b, type);
const ARM64Reg VC = fpr.R(c, type);
const ARM64Reg VD = fpr.RW(d, type);
m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);
ASSERT_MSG(DYNA_REC,
a_single == fpr.IsSingle(a, true) &&
b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)),
"Register allocation turned singles into doubles in the middle of fselx");
}
void JitArm64::frspx(UGeckoInstruction inst)
@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
const u32 b = inst.FB;
const u32 d = inst.FD;
if (fpr.IsSingle(b, true))
const bool single = fpr.IsSingle(b, true);
if (single)
{
// Source is already in single precision, so no need to do anything but to copy to PSR1.
const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle);
@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst)
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
}
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of frspx");
}
void JitArm64::fcmpX(UGeckoInstruction inst)
@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
SetJumpTarget(continue3);
}
SetJumpTarget(continue1);
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
"Register allocation turned singles into doubles in the middle of fcmpX");
}
void JitArm64::fctiwzx(UGeckoInstruction inst)
@ -334,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
const bool single = fpr.IsSingle(b, true);
const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d);
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
const ARM64Reg V0 = fpr.GetReg();
@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
}
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0);
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of fctiwzx");
}

View File

@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
break;
}
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)),
"Register allocation turned singles into doubles in the middle of ps_mergeXX");
}
void JitArm64::ps_mulsX(UGeckoInstruction inst)
@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0);
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_mulsX");
fpr.FixSinglePrecision(d);
}
@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op");
break;
}
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_maddXX");
fpr.FixSinglePrecision(d);
if (V0Q != INVALID_REG)
@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
m_float_emit.MOV(VD, V0);
fpr.Unlock(V0Q);
}
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sel");
}
void JitArm64::ps_sumX(UGeckoInstruction inst)
@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
}
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sumX");
fpr.FixSinglePrecision(d);
fpr.Unlock(V0);

View File

@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg()
// Holy cow, how did you run out of registers?
// We can't return anything reasonable in this case. Return INVALID_REG and watch the failure
// happen
WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb");
ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!");
return INVALID_REG;
}
void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
{
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
OpArg& reg = m_guest_registers[i];
if (i < 32 && regs_used[i])
reg.ResetLastUsed();
else
reg.IncrementLastUsed();
}
}
u32 Arm64RegCache::GetUnlockedRegisterCount() const
{
u32 unlocked_registers = 0;

View File

@ -140,6 +140,8 @@ public:
// Requires unlocking after done
Arm64Gen::ARM64Reg GetReg();
void UpdateLastUsed(BitSet32 regs_used);
// Locks a register so a cache cannot use it
// Useful for function calls
template <typename T = Arm64Gen::ARM64Reg, typename... Args>
@ -281,9 +283,9 @@ public:
// Returns a guest register inside of a host register
// Will dump an immediate to the host register as well
Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair);
Arm64Gen::ARM64Reg R(size_t preg, RegType type);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type);
BitSet32 GetCallerSavedUsed() const override;