Merge pull request #9472 from JosJuice/jitarm64-unexpected-double
JitArm64: Update registers last used before start of instruction
This commit is contained in:
commit
2d75b0d2bc
|
@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
if (!SConfig::GetInstance().bEnableDebugging)
|
||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
|
||||
|
||||
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
|
||||
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
|
||||
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
|
||||
|
||||
BitSet32 fpr_used = op.fregsIn;
|
||||
if (op.fregOut >= 0)
|
||||
fpr_used[op.fregOut] = true;
|
||||
fpr.UpdateLastUsed(fpr_used);
|
||||
|
||||
// Gather pipe writes using a non-immediate address are discovered by profiling.
|
||||
bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end();
|
||||
|
||||
|
|
|
@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
|||
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
|
||||
bool use_b = op5 != 25; // fmul uses no B
|
||||
|
||||
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
|
||||
(!use_c || fpr.IsSingle(c, !packed));
|
||||
const auto inputs_are_singles_func = [&] {
|
||||
return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
|
||||
(!use_c || fpr.IsSingle(c, !packed));
|
||||
};
|
||||
const bool inputs_are_singles = inputs_are_singles_func();
|
||||
|
||||
ARM64Reg VA{}, VB{}, VC{}, VD{};
|
||||
|
||||
|
@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
|
||||
"Register allocation turned singles into doubles in the middle of fp_arith");
|
||||
|
||||
if (single || packed)
|
||||
fpr.FixSinglePrecision(d);
|
||||
}
|
||||
|
@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed),
|
||||
"Register allocation turned singles into doubles in the middle of fp_logic");
|
||||
}
|
||||
|
||||
void JitArm64::fselx(UGeckoInstruction inst)
|
||||
|
@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst)
|
|||
const u32 c = inst.FC;
|
||||
const u32 d = inst.FD;
|
||||
|
||||
const bool a_single = fpr.IsSingle(a, true);
|
||||
if (fpr.IsSingle(a, true))
|
||||
{
|
||||
const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle);
|
||||
|
@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst)
|
|||
m_float_emit.FCMPE(EncodeRegToDouble(VA));
|
||||
}
|
||||
|
||||
const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
|
||||
const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair;
|
||||
const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
|
||||
const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
|
||||
const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
|
||||
const ARM64Reg VB = fpr.R(b, type);
|
||||
const ARM64Reg VC = fpr.R(c, type);
|
||||
const ARM64Reg VD = fpr.RW(d, type);
|
||||
|
||||
m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);
|
||||
|
||||
ASSERT_MSG(DYNA_REC,
|
||||
a_single == fpr.IsSingle(a, true) &&
|
||||
b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)),
|
||||
"Register allocation turned singles into doubles in the middle of fselx");
|
||||
}
|
||||
|
||||
void JitArm64::frspx(UGeckoInstruction inst)
|
||||
|
@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
|||
const u32 b = inst.FB;
|
||||
const u32 d = inst.FD;
|
||||
|
||||
if (fpr.IsSingle(b, true))
|
||||
const bool single = fpr.IsSingle(b, true);
|
||||
if (single)
|
||||
{
|
||||
// Source is already in single precision, so no need to do anything but to copy to PSR1.
|
||||
const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle);
|
||||
|
@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
|||
|
||||
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
|
||||
"Register allocation turned singles into doubles in the middle of frspx");
|
||||
}
|
||||
|
||||
void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||
|
@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
|||
SetJumpTarget(continue3);
|
||||
}
|
||||
SetJumpTarget(continue1);
|
||||
|
||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
|
||||
"Register allocation turned singles into doubles in the middle of fcmpX");
|
||||
}
|
||||
|
||||
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||
|
@ -334,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||
const bool single = fpr.IsSingle(b, true);
|
||||
|
||||
const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
|
||||
const ARM64Reg VD = fpr.RW(d);
|
||||
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
|
||||
|
||||
const ARM64Reg V0 = fpr.GetReg();
|
||||
|
||||
|
@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||
}
|
||||
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
||||
fpr.Unlock(V0);
|
||||
|
||||
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
|
||||
"Register allocation turned singles into doubles in the middle of fctiwzx");
|
||||
}
|
||||
|
|
|
@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
|
|||
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)),
|
||||
"Register allocation turned singles into doubles in the middle of ps_mergeXX");
|
||||
}
|
||||
|
||||
void JitArm64::ps_mulsX(UGeckoInstruction inst)
|
||||
|
@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
|
|||
|
||||
m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0);
|
||||
|
||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)),
|
||||
"Register allocation turned singles into doubles in the middle of ps_mulsX");
|
||||
|
||||
fpr.FixSinglePrecision(d);
|
||||
}
|
||||
|
||||
|
@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
|
|||
ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op");
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
||||
"Register allocation turned singles into doubles in the middle of ps_maddXX");
|
||||
|
||||
fpr.FixSinglePrecision(d);
|
||||
|
||||
if (V0Q != INVALID_REG)
|
||||
|
@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
|
|||
m_float_emit.MOV(VD, V0);
|
||||
fpr.Unlock(V0Q);
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
||||
"Register allocation turned singles into doubles in the middle of ps_sel");
|
||||
}
|
||||
|
||||
void JitArm64::ps_sumX(UGeckoInstruction inst)
|
||||
|
@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
|
|||
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
||||
"Register allocation turned singles into doubles in the middle of ps_sumX");
|
||||
|
||||
fpr.FixSinglePrecision(d);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
|
|
|
@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg()
|
|||
// Holy cow, how did you run out of registers?
|
||||
// We can't return anything reasonable in this case. Return INVALID_REG and watch the failure
|
||||
// happen
|
||||
WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb");
|
||||
ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!");
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
|
||||
{
|
||||
for (size_t i = 0; i < m_guest_registers.size(); ++i)
|
||||
{
|
||||
OpArg& reg = m_guest_registers[i];
|
||||
if (i < 32 && regs_used[i])
|
||||
reg.ResetLastUsed();
|
||||
else
|
||||
reg.IncrementLastUsed();
|
||||
}
|
||||
}
|
||||
|
||||
u32 Arm64RegCache::GetUnlockedRegisterCount() const
|
||||
{
|
||||
u32 unlocked_registers = 0;
|
||||
|
|
|
@ -140,6 +140,8 @@ public:
|
|||
// Requires unlocking after done
|
||||
Arm64Gen::ARM64Reg GetReg();
|
||||
|
||||
void UpdateLastUsed(BitSet32 regs_used);
|
||||
|
||||
// Locks a register so a cache cannot use it
|
||||
// Useful for function calls
|
||||
template <typename T = Arm64Gen::ARM64Reg, typename... Args>
|
||||
|
@ -281,9 +283,9 @@ public:
|
|||
|
||||
// Returns a guest register inside of a host register
|
||||
// Will dump an immediate to the host register as well
|
||||
Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair);
|
||||
Arm64Gen::ARM64Reg R(size_t preg, RegType type);
|
||||
|
||||
Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair);
|
||||
Arm64Gen::ARM64Reg RW(size_t preg, RegType type);
|
||||
|
||||
BitSet32 GetCallerSavedUsed() const override;
|
||||
|
||||
|
|
Loading…
Reference in New Issue