Merge pull request #9472 from JosJuice/jitarm64-unexpected-double

JitArm64: Update registers last used before start of instruction
This commit is contained in:
Léo Lam 2021-01-27 12:43:11 +01:00 committed by GitHub
commit 2d75b0d2bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 74 additions and 10 deletions

View File

@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (!SConfig::GetInstance().bEnableDebugging) if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC); js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
BitSet32 fpr_used = op.fregsIn;
if (op.fregOut >= 0)
fpr_used[op.fregOut] = true;
fpr.UpdateLastUsed(fpr_used);
// Gather pipe writes using a non-immediate address are discovered by profiling. // Gather pipe writes using a non-immediate address are discovered by profiling.
bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end(); bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end();

View File

@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
bool use_b = op5 != 25; // fmul uses no B bool use_b = op5 != 25; // fmul uses no B
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && const auto inputs_are_singles_func = [&] {
(!use_c || fpr.IsSingle(c, !packed)); return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
(!use_c || fpr.IsSingle(c, !packed));
};
const bool inputs_are_singles = inputs_are_singles_func();
ARM64Reg VA{}, VB{}, VC{}, VD{}; ARM64Reg VA{}, VB{}, VC{}, VD{};
@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
} }
} }
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
"Register allocation turned singles into doubles in the middle of fp_arith");
if (single || packed) if (single || packed)
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
} }
@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
break; break;
} }
} }
ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed),
"Register allocation turned singles into doubles in the middle of fp_logic");
} }
void JitArm64::fselx(UGeckoInstruction inst) void JitArm64::fselx(UGeckoInstruction inst)
@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst)
const u32 c = inst.FC; const u32 c = inst.FC;
const u32 d = inst.FD; const u32 d = inst.FD;
const bool a_single = fpr.IsSingle(a, true);
if (fpr.IsSingle(a, true)) if (fpr.IsSingle(a, true))
{ {
const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle); const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle);
@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst)
m_float_emit.FCMPE(EncodeRegToDouble(VA)); m_float_emit.FCMPE(EncodeRegToDouble(VA));
} }
const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true); const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair; const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble; const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;
const ARM64Reg VB = fpr.R(b, type); const ARM64Reg VB = fpr.R(b, type);
const ARM64Reg VC = fpr.R(c, type); const ARM64Reg VC = fpr.R(c, type);
const ARM64Reg VD = fpr.RW(d, type); const ARM64Reg VD = fpr.RW(d, type);
m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE); m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);
ASSERT_MSG(DYNA_REC,
a_single == fpr.IsSingle(a, true) &&
b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)),
"Register allocation turned singles into doubles in the middle of fselx");
} }
void JitArm64::frspx(UGeckoInstruction inst) void JitArm64::frspx(UGeckoInstruction inst)
@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
const u32 b = inst.FB; const u32 b = inst.FB;
const u32 d = inst.FD; const u32 d = inst.FD;
if (fpr.IsSingle(b, true)) const bool single = fpr.IsSingle(b, true);
if (single)
{ {
// Source is already in single precision, so no need to do anything but to copy to PSR1. // Source is already in single precision, so no need to do anything but to copy to PSR1.
const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle); const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle);
@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst)
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
} }
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of frspx");
} }
void JitArm64::fcmpX(UGeckoInstruction inst) void JitArm64::fcmpX(UGeckoInstruction inst)
@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
SetJumpTarget(continue3); SetJumpTarget(continue3);
} }
SetJumpTarget(continue1); SetJumpTarget(continue1);
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
"Register allocation turned singles into doubles in the middle of fcmpX");
} }
void JitArm64::fctiwzx(UGeckoInstruction inst) void JitArm64::fctiwzx(UGeckoInstruction inst)
@ -334,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
const bool single = fpr.IsSingle(b, true); const bool single = fpr.IsSingle(b, true);
const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair); const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d); const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
const ARM64Reg V0 = fpr.GetReg(); const ARM64Reg V0 = fpr.GetReg();
@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
} }
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0); fpr.Unlock(V0);
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of fctiwzx");
} }

View File

@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op"); ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
break; break;
} }
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)),
"Register allocation turned singles into doubles in the middle of ps_mergeXX");
} }
void JitArm64::ps_mulsX(UGeckoInstruction inst) void JitArm64::ps_mulsX(UGeckoInstruction inst)
@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0); m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0);
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_mulsX");
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
} }
@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op"); ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op");
break; break;
} }
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_maddXX");
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
if (V0Q != INVALID_REG) if (V0Q != INVALID_REG)
@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
m_float_emit.MOV(VD, V0); m_float_emit.MOV(VD, V0);
fpr.Unlock(V0Q); fpr.Unlock(V0Q);
} }
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sel");
} }
void JitArm64::ps_sumX(UGeckoInstruction inst) void JitArm64::ps_sumX(UGeckoInstruction inst)
@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0); m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
} }
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sumX");
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
fpr.Unlock(V0); fpr.Unlock(V0);

View File

@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg()
// Holy cow, how did you run out of registers? // Holy cow, how did you run out of registers?
// We can't return anything reasonable in this case. Return INVALID_REG and watch the failure // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure
// happen // happen
WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb"); ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!");
return INVALID_REG; return INVALID_REG;
} }
void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
{
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
OpArg& reg = m_guest_registers[i];
if (i < 32 && regs_used[i])
reg.ResetLastUsed();
else
reg.IncrementLastUsed();
}
}
u32 Arm64RegCache::GetUnlockedRegisterCount() const u32 Arm64RegCache::GetUnlockedRegisterCount() const
{ {
u32 unlocked_registers = 0; u32 unlocked_registers = 0;

View File

@ -140,6 +140,8 @@ public:
// Requires unlocking after done // Requires unlocking after done
Arm64Gen::ARM64Reg GetReg(); Arm64Gen::ARM64Reg GetReg();
void UpdateLastUsed(BitSet32 regs_used);
// Locks a register so a cache cannot use it // Locks a register so a cache cannot use it
// Useful for function calls // Useful for function calls
template <typename T = Arm64Gen::ARM64Reg, typename... Args> template <typename T = Arm64Gen::ARM64Reg, typename... Args>
@ -281,9 +283,9 @@ public:
// Returns a guest register inside of a host register // Returns a guest register inside of a host register
// Will dump an immediate to the host register as well // Will dump an immediate to the host register as well
Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair); Arm64Gen::ARM64Reg R(size_t preg, RegType type);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair); Arm64Gen::ARM64Reg RW(size_t preg, RegType type);
BitSet32 GetCallerSavedUsed() const override; BitSet32 GetCallerSavedUsed() const override;