Merge pull request #9822 from JosJuice/jitarm64-ps-cmpxx

JitArm64: Implement ps_cmpXX
This commit is contained in:
Markus Wick 2021-07-10 19:20:48 +02:00 committed by GitHub
commit f6ca70d094
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 22 deletions

View File

@ -152,6 +152,7 @@ public:
void ps_sumX(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst);
void ps_rsqrte(UGeckoInstruction inst);
void ps_cmpXX(UGeckoInstruction inst);
// Loadstore paired
void psq_l(UGeckoInstruction inst);
@ -168,6 +169,8 @@ public:
Arm64Gen::ARM64Reg src_reg,
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void FloatCompare(UGeckoInstruction inst, bool upper = false);
bool IsFPRStoreSafe(size_t guest_reg) const;
protected:

View File

@ -363,11 +363,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
}
}
void JitArm64::fcmpX(UGeckoInstruction inst)
void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF;
const u32 a = inst.FA;
@ -386,12 +383,15 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
const bool input_ftz_workaround =
!cpu_info.bAFP && (!js.fpr_is_store_safe[a] || !js.fpr_is_store_safe[b]);
const bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true) && !input_ftz_workaround;
const RegType type = singles ? RegType::LowerPairSingle : RegType::LowerPair;
const bool singles = fpr.IsSingle(a, !upper) && fpr.IsSingle(b, !upper) && !input_ftz_workaround;
const RegType lower_type = singles ? RegType::LowerPairSingle : RegType::LowerPair;
const RegType upper_type = singles ? RegType::Single : RegType::Register;
const auto reg_encoder = singles ? EncodeRegToSingle : EncodeRegToDouble;
const ARM64Reg VA = reg_encoder(fpr.R(a, type));
const ARM64Reg VB = reg_encoder(fpr.R(b, type));
const bool upper_a = upper && !js.op->fprIsDuplicated[a];
const bool upper_b = upper && !js.op->fprIsDuplicated[b];
ARM64Reg VA = reg_encoder(fpr.R(a, upper_a ? upper_type : lower_type));
ARM64Reg VB = reg_encoder(fpr.R(b, upper_b ? upper_type : lower_type));
gpr.BindCRToRegister(crf, false);
const ARM64Reg XA = gpr.CR(crf);
@ -404,12 +404,39 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
ANDI2R(fpscr_reg, fpscr_reg, ~FPCC_MASK);
}
ARM64Reg V0Q = ARM64Reg::INVALID_REG;
ARM64Reg V1Q = ARM64Reg::INVALID_REG;
if (upper_a)
{
V0Q = fpr.GetReg();
m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V0Q), VA, 1);
VA = reg_encoder(V0Q);
}
if (upper_b)
{
if (a == b)
{
VB = VA;
}
else
{
V1Q = fpr.GetReg();
m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V1Q), VB, 1);
VB = reg_encoder(V1Q);
}
}
m_float_emit.FCMP(VA, VB);
if (V0Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V0Q);
if (V1Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V1Q);
FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3;
ORR(XA, ARM64Reg::ZR, 32, 0, true);
m_float_emit.FCMP(VA, VB);
if (a != b)
{
// if B > A goto Greater's jump target
@ -465,6 +492,14 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
}
}
void JitArm64::fcmpX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FloatCompare(inst);
}
void JitArm64::fctiwzx(UGeckoInstruction inst)
{
INSTRUCTION_START

View File

@ -451,3 +451,12 @@ void JitArm64::ps_rsqrte(UGeckoInstruction inst)
SetFPRFIfNeeded(true, VD);
}
void JitArm64::ps_cmpXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
const bool upper = inst.SUBOP10 & 64;
FloatCompare(inst, upper);
}

View File

@ -88,18 +88,18 @@ constexpr std::array<GekkoOPTemplate, 54> primarytable{{
constexpr std::array<GekkoOPTemplate, 13> table4{{
// SUBOP10
{0, &JitArm64::FallBackToInterpreter}, // ps_cmpu0
{32, &JitArm64::FallBackToInterpreter}, // ps_cmpo0
{40, &JitArm64::fp_logic}, // ps_neg
{136, &JitArm64::fp_logic}, // ps_nabs
{264, &JitArm64::fp_logic}, // ps_abs
{64, &JitArm64::FallBackToInterpreter}, // ps_cmpu1
{72, &JitArm64::fp_logic}, // ps_mr
{96, &JitArm64::FallBackToInterpreter}, // ps_cmpo1
{528, &JitArm64::ps_mergeXX}, // ps_merge00
{560, &JitArm64::ps_mergeXX}, // ps_merge01
{592, &JitArm64::ps_mergeXX}, // ps_merge10
{624, &JitArm64::ps_mergeXX}, // ps_merge11
{0, &JitArm64::ps_cmpXX}, // ps_cmpu0
{32, &JitArm64::ps_cmpXX}, // ps_cmpo0
{40, &JitArm64::fp_logic}, // ps_neg
{136, &JitArm64::fp_logic}, // ps_nabs
{264, &JitArm64::fp_logic}, // ps_abs
{64, &JitArm64::ps_cmpXX}, // ps_cmpu1
{72, &JitArm64::fp_logic}, // ps_mr
{96, &JitArm64::ps_cmpXX}, // ps_cmpo1
{528, &JitArm64::ps_mergeXX}, // ps_merge00
{560, &JitArm64::ps_mergeXX}, // ps_merge01
{592, &JitArm64::ps_mergeXX}, // ps_merge10
{624, &JitArm64::ps_mergeXX}, // ps_merge11
{1014, &JitArm64::FallBackToInterpreter}, // dcbz_l
}};