From 6df48ed432921116841273225173319c901bb568 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Fri, 6 Jun 2014 01:05:22 +0200
Subject: [PATCH 1/2] x64Emitter: add CVTTPD2DQ

---
 Source/Core/Common/x64Emitter.cpp | 1 +
 Source/Core/Common/x64Emitter.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index 59b764389c..b4ec2bc331 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -1405,6 +1405,7 @@ void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, reg
 
 void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);}
 void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);}
+void XEmitter::CVTTPD2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0xE6, true, xregdest, arg);}
 
 void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src)  {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}
 
diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h
index eac659b06b..8d7742d33b 100644
--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@@ -576,6 +576,7 @@ public:
 
 	void CVTTSS2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTTPS2DQ(X64Reg regOp, OpArg arg);
+	void CVTTPD2DQ(X64Reg regOp, OpArg arg);
 
 	// SSE2: Packed integer instructions
 	void PACKSSDW(X64Reg dest, OpArg arg);

From 6521929f99d062fdd4d740c65cc7a7f0d48c7ed3 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Sun, 8 Jun 2014 14:30:59 +0200
Subject: [PATCH 2/2] Jit64: implement fctiw/fctiwz

---
 Source/Core/Core/PowerPC/Jit64/Jit.h          |  1 +
 .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp  |  4 +-
 .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp  | 42 +++++++++++++++++++
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index d273d32aaf..8c5056c372 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -174,6 +174,7 @@ public:
 	void fp_arith(UGeckoInstruction inst);
 
 	void fcmpx(UGeckoInstruction inst);
+	void fctiwx(UGeckoInstruction inst);
 	void fmrx(UGeckoInstruction inst);
 
 	void cmpXX(UGeckoInstruction inst);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
index 0184205f1f..c64cbdca66 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
@@ -337,8 +337,8 @@ static GekkoOPTemplate table63[] =
 	{264, &Jit64::fsign},                 //"fabsx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{32,  &Jit64::fcmpx},                 //"fcmpo",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{0,   &Jit64::fcmpx},                 //"fcmpu",   OPTYPE_FPU, FL_RC_BIT_F}},
-	{14,  &Jit64::FallBackToInterpreter}, //"fctiwx",  OPTYPE_FPU, FL_RC_BIT_F}},
-	{15,  &Jit64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
+	{14,  &Jit64::fctiwx},                //"fctiwx",  OPTYPE_FPU, FL_RC_BIT_F}},
+	{15,  &Jit64::fctiwx},                //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
 	{72,  &Jit64::fmrx},                  //"fmrx",    OPTYPE_FPU, FL_RC_BIT_F}},
 	{136, &Jit64::fsign},                 //"fnabsx",  OPTYPE_FPU, FL_RC_BIT_F}},
 	{40,  &Jit64::fsign},                 //"fnegx",   OPTYPE_FPU, FL_RC_BIT_F}},
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index 0552a2c751..648da27d26 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -10,6 +10,7 @@
 
 static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
 static const u64 GC_ALIGNED16(psAbsMask2[2])  = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
+static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
 
 void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
 {
@@ -267,3 +268,44 @@ void Jit64::fcmpx(UGeckoInstruction inst)
 
 	fpr.UnlockAll();
 }
+
+void Jit64::fctiwx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff);
+	FALLBACK_IF(inst.Rc);
+
+	int d = inst.RD;
+	int b = inst.RB;
+	fpr.Lock(d, b);
+	fpr.BindToRegister(d, d == b);
+
+	// Intel uses 0x80000000 as a generic error code while PowerPC uses clamping:
+	//
+	// input       | output fctiw | output CVTPD2DQ
+	// ------------+--------------+----------------
+	// > +2^31 - 1 | 0x7fffffff   | 0x80000000
+	// < -2^31     | 0x80000000   | 0x80000000
+	// any NaN     | 0x80000000   | 0x80000000
+	//
+	// The upper 32 bits of the result are set to 0xfff80000,
+	// except for -0.0 where they are set to 0xfff80001 (TODO).
+
+	MOVAPD(XMM0, M(&half_qnan_and_s32_max));
+	MINSD(XMM0, fpr.R(b));
+	switch (inst.SUBOP10)
+	{
+		// fctiwx
+		case 14:
+			CVTPD2DQ(XMM0, R(XMM0));
+			break;
+
+		// fctiwzx
+		case 15:
+			CVTTPD2DQ(XMM0, R(XMM0));
+			break;
+	}
+	// d[64+] must not be modified
+	MOVSD(fpr.R(d), XMM0);
+	fpr.UnlockAll();
+}