Merge pull request #9822 from JosJuice/jitarm64-ps-cmpxx
JitArm64: Implement ps_cmpXX
This commit is contained in:
commit
f6ca70d094
|
@ -152,6 +152,7 @@ public:
|
||||||
void ps_sumX(UGeckoInstruction inst);
|
void ps_sumX(UGeckoInstruction inst);
|
||||||
void ps_res(UGeckoInstruction inst);
|
void ps_res(UGeckoInstruction inst);
|
||||||
void ps_rsqrte(UGeckoInstruction inst);
|
void ps_rsqrte(UGeckoInstruction inst);
|
||||||
|
void ps_cmpXX(UGeckoInstruction inst);
|
||||||
|
|
||||||
// Loadstore paired
|
// Loadstore paired
|
||||||
void psq_l(UGeckoInstruction inst);
|
void psq_l(UGeckoInstruction inst);
|
||||||
|
@ -168,6 +169,8 @@ public:
|
||||||
Arm64Gen::ARM64Reg src_reg,
|
Arm64Gen::ARM64Reg src_reg,
|
||||||
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
|
void FloatCompare(UGeckoInstruction inst, bool upper = false);
|
||||||
|
|
||||||
bool IsFPRStoreSafe(size_t guest_reg) const;
|
bool IsFPRStoreSafe(size_t guest_reg) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -363,11 +363,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::fcmpX(UGeckoInstruction inst)
|
void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
|
||||||
|
|
||||||
const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF;
|
const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF;
|
||||||
|
|
||||||
const u32 a = inst.FA;
|
const u32 a = inst.FA;
|
||||||
|
@ -386,12 +383,15 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
const bool input_ftz_workaround =
|
const bool input_ftz_workaround =
|
||||||
!cpu_info.bAFP && (!js.fpr_is_store_safe[a] || !js.fpr_is_store_safe[b]);
|
!cpu_info.bAFP && (!js.fpr_is_store_safe[a] || !js.fpr_is_store_safe[b]);
|
||||||
|
|
||||||
const bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true) && !input_ftz_workaround;
|
const bool singles = fpr.IsSingle(a, !upper) && fpr.IsSingle(b, !upper) && !input_ftz_workaround;
|
||||||
const RegType type = singles ? RegType::LowerPairSingle : RegType::LowerPair;
|
const RegType lower_type = singles ? RegType::LowerPairSingle : RegType::LowerPair;
|
||||||
|
const RegType upper_type = singles ? RegType::Single : RegType::Register;
|
||||||
const auto reg_encoder = singles ? EncodeRegToSingle : EncodeRegToDouble;
|
const auto reg_encoder = singles ? EncodeRegToSingle : EncodeRegToDouble;
|
||||||
|
|
||||||
const ARM64Reg VA = reg_encoder(fpr.R(a, type));
|
const bool upper_a = upper && !js.op->fprIsDuplicated[a];
|
||||||
const ARM64Reg VB = reg_encoder(fpr.R(b, type));
|
const bool upper_b = upper && !js.op->fprIsDuplicated[b];
|
||||||
|
ARM64Reg VA = reg_encoder(fpr.R(a, upper_a ? upper_type : lower_type));
|
||||||
|
ARM64Reg VB = reg_encoder(fpr.R(b, upper_b ? upper_type : lower_type));
|
||||||
|
|
||||||
gpr.BindCRToRegister(crf, false);
|
gpr.BindCRToRegister(crf, false);
|
||||||
const ARM64Reg XA = gpr.CR(crf);
|
const ARM64Reg XA = gpr.CR(crf);
|
||||||
|
@ -404,12 +404,39 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
ANDI2R(fpscr_reg, fpscr_reg, ~FPCC_MASK);
|
ANDI2R(fpscr_reg, fpscr_reg, ~FPCC_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ARM64Reg V0Q = ARM64Reg::INVALID_REG;
|
||||||
|
ARM64Reg V1Q = ARM64Reg::INVALID_REG;
|
||||||
|
if (upper_a)
|
||||||
|
{
|
||||||
|
V0Q = fpr.GetReg();
|
||||||
|
m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V0Q), VA, 1);
|
||||||
|
VA = reg_encoder(V0Q);
|
||||||
|
}
|
||||||
|
if (upper_b)
|
||||||
|
{
|
||||||
|
if (a == b)
|
||||||
|
{
|
||||||
|
VB = VA;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
V1Q = fpr.GetReg();
|
||||||
|
m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V1Q), VB, 1);
|
||||||
|
VB = reg_encoder(V1Q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_float_emit.FCMP(VA, VB);
|
||||||
|
|
||||||
|
if (V0Q != ARM64Reg::INVALID_REG)
|
||||||
|
fpr.Unlock(V0Q);
|
||||||
|
if (V1Q != ARM64Reg::INVALID_REG)
|
||||||
|
fpr.Unlock(V1Q);
|
||||||
|
|
||||||
FixupBranch pNaN, pLesser, pGreater;
|
FixupBranch pNaN, pLesser, pGreater;
|
||||||
FixupBranch continue1, continue2, continue3;
|
FixupBranch continue1, continue2, continue3;
|
||||||
ORR(XA, ARM64Reg::ZR, 32, 0, true);
|
ORR(XA, ARM64Reg::ZR, 32, 0, true);
|
||||||
|
|
||||||
m_float_emit.FCMP(VA, VB);
|
|
||||||
|
|
||||||
if (a != b)
|
if (a != b)
|
||||||
{
|
{
|
||||||
// if B > A goto Greater's jump target
|
// if B > A goto Greater's jump target
|
||||||
|
@ -465,6 +492,14 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
|
||||||
|
FloatCompare(inst);
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
|
@ -451,3 +451,12 @@ void JitArm64::ps_rsqrte(UGeckoInstruction inst)
|
||||||
|
|
||||||
SetFPRFIfNeeded(true, VD);
|
SetFPRFIfNeeded(true, VD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitArm64::ps_cmpXX(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITPairedOff);
|
||||||
|
|
||||||
|
const bool upper = inst.SUBOP10 & 64;
|
||||||
|
FloatCompare(inst, upper);
|
||||||
|
}
|
||||||
|
|
|
@ -88,18 +88,18 @@ constexpr std::array<GekkoOPTemplate, 54> primarytable{{
|
||||||
|
|
||||||
constexpr std::array<GekkoOPTemplate, 13> table4{{
|
constexpr std::array<GekkoOPTemplate, 13> table4{{
|
||||||
// SUBOP10
|
// SUBOP10
|
||||||
{0, &JitArm64::FallBackToInterpreter}, // ps_cmpu0
|
{0, &JitArm64::ps_cmpXX}, // ps_cmpu0
|
||||||
{32, &JitArm64::FallBackToInterpreter}, // ps_cmpo0
|
{32, &JitArm64::ps_cmpXX}, // ps_cmpo0
|
||||||
{40, &JitArm64::fp_logic}, // ps_neg
|
{40, &JitArm64::fp_logic}, // ps_neg
|
||||||
{136, &JitArm64::fp_logic}, // ps_nabs
|
{136, &JitArm64::fp_logic}, // ps_nabs
|
||||||
{264, &JitArm64::fp_logic}, // ps_abs
|
{264, &JitArm64::fp_logic}, // ps_abs
|
||||||
{64, &JitArm64::FallBackToInterpreter}, // ps_cmpu1
|
{64, &JitArm64::ps_cmpXX}, // ps_cmpu1
|
||||||
{72, &JitArm64::fp_logic}, // ps_mr
|
{72, &JitArm64::fp_logic}, // ps_mr
|
||||||
{96, &JitArm64::FallBackToInterpreter}, // ps_cmpo1
|
{96, &JitArm64::ps_cmpXX}, // ps_cmpo1
|
||||||
{528, &JitArm64::ps_mergeXX}, // ps_merge00
|
{528, &JitArm64::ps_mergeXX}, // ps_merge00
|
||||||
{560, &JitArm64::ps_mergeXX}, // ps_merge01
|
{560, &JitArm64::ps_mergeXX}, // ps_merge01
|
||||||
{592, &JitArm64::ps_mergeXX}, // ps_merge10
|
{592, &JitArm64::ps_mergeXX}, // ps_merge10
|
||||||
{624, &JitArm64::ps_mergeXX}, // ps_merge11
|
{624, &JitArm64::ps_mergeXX}, // ps_merge11
|
||||||
|
|
||||||
{1014, &JitArm64::FallBackToInterpreter}, // dcbz_l
|
{1014, &JitArm64::FallBackToInterpreter}, // dcbz_l
|
||||||
}};
|
}};
|
||||||
|
|
Loading…
Reference in New Issue