JIT: add ps_cmp0/ps_cmp1 implementations using current fcmp code

2014-09-18 03:57:24 -07:00 · 2014-09-18 03:57:24 -07:00 · 1b425dedd1
parent 3c49200b22
commit 1b425dedd1
4 changed files with 45 additions and 18 deletions
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@ -142,6 +142,7 @@ public:
 	typedef u32 (*Operation)(u32 a, u32 b);
 	void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
 	void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
+	void FloatCompare(UGeckoInstruction inst, bool upper = false);

 	// OPCODES
 	void unknown_instruction(UGeckoInstruction _inst);
@ -199,6 +200,7 @@ public:
 	void ps_maddXX(UGeckoInstruction inst);
 	void ps_sum(UGeckoInstruction inst);
 	void ps_muls(UGeckoInstruction inst);
+	void ps_cmpXX(UGeckoInstruction inst);

 	void fp_arith(UGeckoInstruction inst);

--- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
@ -110,14 +110,14 @@ static GekkoOPTemplate primarytable[] =

 static GekkoOPTemplate table4[] =
 {    //SUBOP10
-	{0,    &Jit64::FallBackToInterpreter}, //"ps_cmpu0",   OPTYPE_PS, FL_SET_CRn}},
-	{32,   &Jit64::FallBackToInterpreter}, //"ps_cmpo0",   OPTYPE_PS, FL_SET_CRn}},
+	{0,    &Jit64::ps_cmpXX},              //"ps_cmpu0",   OPTYPE_PS, FL_SET_CRn}},
+	{32,   &Jit64::ps_cmpXX},              //"ps_cmpo0",   OPTYPE_PS, FL_SET_CRn}},
 	{40,   &Jit64::ps_sign},               //"ps_neg",     OPTYPE_PS, FL_RC_BIT}},
 	{136,  &Jit64::ps_sign},               //"ps_nabs",    OPTYPE_PS, FL_RC_BIT}},
 	{264,  &Jit64::ps_sign},               //"ps_abs",     OPTYPE_PS, FL_RC_BIT}},
-	{64,   &Jit64::FallBackToInterpreter}, //"ps_cmpu1",   OPTYPE_PS, FL_RC_BIT}},
+	{64,   &Jit64::ps_cmpXX},              //"ps_cmpu1",   OPTYPE_PS, FL_RC_BIT}},
 	{72,   &Jit64::ps_mr},                 //"ps_mr",      OPTYPE_PS, FL_RC_BIT}},
-	{96,   &Jit64::FallBackToInterpreter}, //"ps_cmpo1",   OPTYPE_PS, FL_RC_BIT}},
+	{96,   &Jit64::ps_cmpXX},              //"ps_cmpo1",   OPTYPE_PS, FL_RC_BIT}},
 	{528,  &Jit64::ps_mergeXX},            //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
 	{560,  &Jit64::ps_mergeXX},            //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
 	{592,  &Jit64::ps_mergeXX},            //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@ -267,25 +267,32 @@ void Jit64::fmrx(UGeckoInstruction inst)
 	fpr.UnlockAll();
 }

-void Jit64::fcmpx(UGeckoInstruction inst)
+void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
 {
-	INSTRUCTION_START
-	JITDISABLE(bJITFloatingPointOff);
-	FALLBACK_IF(jo.fpAccurateFcmp);
-
-	//bool ordered = inst.SUBOP10 == 32;
-	int a   = inst.FA;
-	int b   = inst.FB;
-	int crf = inst.CRFD;
 	bool fprf = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableFPRF && js.op->wantsFPRF;
+	//bool ordered = !!(inst.SUBOP10 & 32);
+	int a = inst.FA;
+	int b = inst.FB;
+	int crf = inst.CRFD;

-	fpr.Lock(a,b);
-	fpr.BindToRegister(b, true);
+	fpr.Lock(a, b);
+	fpr.BindToRegister(b, true, false);

 	if (fprf)
 		AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
-	// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
-	UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
+
+	if (upper)
+	{
+		fpr.BindToRegister(a, true, false);
+		MOVHLPS(XMM0, fpr.RX(a));
+		MOVHLPS(XMM1, fpr.RX(b));
+		UCOMISD(XMM1, R(XMM0));
+	}
+	else
+	{
+		// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
+		UCOMISD(fpr.RX(b), fpr.R(a));
+	}

 	FixupBranch pNaN, pLesser, pGreater;
 	FixupBranch continue1, continue2, continue3;
@ -293,7 +300,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
 	if (a != b)
 	{
 		// if B > A, goto Lesser's jump target
-		pLesser  = J_CC(CC_A);
+		pLesser = J_CC(CC_A);
 	}

 	// if (B != B) or (A != A), goto NaN's jump target
@ -344,6 +351,15 @@ void Jit64::fcmpx(UGeckoInstruction inst)
 	fpr.UnlockAll();
 }

+void Jit64::fcmpx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff);
+	FALLBACK_IF(jo.fpAccurateFcmp);
+
+	FloatCompare(inst);
+}
+
 void Jit64::fctiwx(UGeckoInstruction inst)
 {
 	INSTRUCTION_START
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@ -351,3 +351,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
 	MOVAPD(fpr.RX(d), R(XMM0));
 	fpr.UnlockAll();
 }
+
+void Jit64::ps_cmpXX(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff);
+	FALLBACK_IF(jo.fpAccurateFcmp);
+
+	FloatCompare(inst, !!(inst.SUBOP10 & 64));
+}