Merge pull request #9822 from JosJuice/jitarm64-ps-cmpxx

JitArm64: Implement ps_cmpXX
This commit is contained in:
Markus Wick 2021-07-10 19:20:48 +02:00 committed by GitHub
commit f6ca70d094
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 22 deletions

View File

@ -152,6 +152,7 @@ public:
void ps_sumX(UGeckoInstruction inst); void ps_sumX(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst);
void ps_rsqrte(UGeckoInstruction inst); void ps_rsqrte(UGeckoInstruction inst);
void ps_cmpXX(UGeckoInstruction inst);
// Loadstore paired // Loadstore paired
void psq_l(UGeckoInstruction inst); void psq_l(UGeckoInstruction inst);
@ -168,6 +169,8 @@ public:
Arm64Gen::ARM64Reg src_reg, Arm64Gen::ARM64Reg src_reg,
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG); Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void FloatCompare(UGeckoInstruction inst, bool upper = false);
bool IsFPRStoreSafe(size_t guest_reg) const; bool IsFPRStoreSafe(size_t guest_reg) const;
protected: protected:

View File

@ -363,11 +363,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
} }
} }
void JitArm64::fcmpX(UGeckoInstruction inst) void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
{ {
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF; const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF;
const u32 a = inst.FA; const u32 a = inst.FA;
@ -386,12 +383,15 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
const bool input_ftz_workaround = const bool input_ftz_workaround =
!cpu_info.bAFP && (!js.fpr_is_store_safe[a] || !js.fpr_is_store_safe[b]); !cpu_info.bAFP && (!js.fpr_is_store_safe[a] || !js.fpr_is_store_safe[b]);
const bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true) && !input_ftz_workaround; const bool singles = fpr.IsSingle(a, !upper) && fpr.IsSingle(b, !upper) && !input_ftz_workaround;
const RegType type = singles ? RegType::LowerPairSingle : RegType::LowerPair; const RegType lower_type = singles ? RegType::LowerPairSingle : RegType::LowerPair;
const RegType upper_type = singles ? RegType::Single : RegType::Register;
const auto reg_encoder = singles ? EncodeRegToSingle : EncodeRegToDouble; const auto reg_encoder = singles ? EncodeRegToSingle : EncodeRegToDouble;
const ARM64Reg VA = reg_encoder(fpr.R(a, type)); const bool upper_a = upper && !js.op->fprIsDuplicated[a];
const ARM64Reg VB = reg_encoder(fpr.R(b, type)); const bool upper_b = upper && !js.op->fprIsDuplicated[b];
ARM64Reg VA = reg_encoder(fpr.R(a, upper_a ? upper_type : lower_type));
ARM64Reg VB = reg_encoder(fpr.R(b, upper_b ? upper_type : lower_type));
gpr.BindCRToRegister(crf, false); gpr.BindCRToRegister(crf, false);
const ARM64Reg XA = gpr.CR(crf); const ARM64Reg XA = gpr.CR(crf);
@ -404,12 +404,39 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
ANDI2R(fpscr_reg, fpscr_reg, ~FPCC_MASK); ANDI2R(fpscr_reg, fpscr_reg, ~FPCC_MASK);
} }
ARM64Reg V0Q = ARM64Reg::INVALID_REG;
ARM64Reg V1Q = ARM64Reg::INVALID_REG;
if (upper_a)
{
V0Q = fpr.GetReg();
m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V0Q), VA, 1);
VA = reg_encoder(V0Q);
}
if (upper_b)
{
if (a == b)
{
VB = VA;
}
else
{
V1Q = fpr.GetReg();
m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V1Q), VB, 1);
VB = reg_encoder(V1Q);
}
}
m_float_emit.FCMP(VA, VB);
if (V0Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V0Q);
if (V1Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V1Q);
FixupBranch pNaN, pLesser, pGreater; FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3; FixupBranch continue1, continue2, continue3;
ORR(XA, ARM64Reg::ZR, 32, 0, true); ORR(XA, ARM64Reg::ZR, 32, 0, true);
m_float_emit.FCMP(VA, VB);
if (a != b) if (a != b)
{ {
// if B > A goto Greater's jump target // if B > A goto Greater's jump target
@ -465,6 +492,14 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
} }
} }
void JitArm64::fcmpX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FloatCompare(inst);
}
void JitArm64::fctiwzx(UGeckoInstruction inst) void JitArm64::fctiwzx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START

View File

@ -451,3 +451,12 @@ void JitArm64::ps_rsqrte(UGeckoInstruction inst)
SetFPRFIfNeeded(true, VD); SetFPRFIfNeeded(true, VD);
} }
void JitArm64::ps_cmpXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
const bool upper = inst.SUBOP10 & 64;
FloatCompare(inst, upper);
}

View File

@ -88,18 +88,18 @@ constexpr std::array<GekkoOPTemplate, 54> primarytable{{
constexpr std::array<GekkoOPTemplate, 13> table4{{ constexpr std::array<GekkoOPTemplate, 13> table4{{
// SUBOP10 // SUBOP10
{0, &JitArm64::FallBackToInterpreter}, // ps_cmpu0 {0, &JitArm64::ps_cmpXX}, // ps_cmpu0
{32, &JitArm64::FallBackToInterpreter}, // ps_cmpo0 {32, &JitArm64::ps_cmpXX}, // ps_cmpo0
{40, &JitArm64::fp_logic}, // ps_neg {40, &JitArm64::fp_logic}, // ps_neg
{136, &JitArm64::fp_logic}, // ps_nabs {136, &JitArm64::fp_logic}, // ps_nabs
{264, &JitArm64::fp_logic}, // ps_abs {264, &JitArm64::fp_logic}, // ps_abs
{64, &JitArm64::FallBackToInterpreter}, // ps_cmpu1 {64, &JitArm64::ps_cmpXX}, // ps_cmpu1
{72, &JitArm64::fp_logic}, // ps_mr {72, &JitArm64::fp_logic}, // ps_mr
{96, &JitArm64::FallBackToInterpreter}, // ps_cmpo1 {96, &JitArm64::ps_cmpXX}, // ps_cmpo1
{528, &JitArm64::ps_mergeXX}, // ps_merge00 {528, &JitArm64::ps_mergeXX}, // ps_merge00
{560, &JitArm64::ps_mergeXX}, // ps_merge01 {560, &JitArm64::ps_mergeXX}, // ps_merge01
{592, &JitArm64::ps_mergeXX}, // ps_merge10 {592, &JitArm64::ps_mergeXX}, // ps_merge10
{624, &JitArm64::ps_mergeXX}, // ps_merge11 {624, &JitArm64::ps_mergeXX}, // ps_merge11
{1014, &JitArm64::FallBackToInterpreter}, // dcbz_l {1014, &JitArm64::FallBackToInterpreter}, // dcbz_l
}}; }};