From 6df48ed432921116841273225173319c901bb568 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Fri, 6 Jun 2014 01:05:22 +0200 Subject: [PATCH 1/2] x64Emitter: add CVTTPD2DQ --- Source/Core/Common/x64Emitter.cpp | 1 + Source/Core/Common/x64Emitter.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 59b764389c..b4ec2bc331 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1405,6 +1405,7 @@ void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, reg void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);} void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);} +void XEmitter::CVTTPD2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0xE6, true, xregdest, arg);} void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));} diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index eac659b06b..8d7742d33b 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -576,6 +576,7 @@ public: void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTTPS2DQ(X64Reg regOp, OpArg arg); + void CVTTPD2DQ(X64Reg regOp, OpArg arg); // SSE2: Packed integer instructions void PACKSSDW(X64Reg dest, OpArg arg); From 6521929f99d062fdd4d740c65cc7a7f0d48c7ed3 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sun, 8 Jun 2014 14:30:59 +0200 Subject: [PATCH 2/2] Jit64: implement fctiw/fctiwz --- Source/Core/Core/PowerPC/Jit64/Jit.h | 1 + .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 4 +- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 42 +++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index d273d32aaf..8c5056c372 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -174,6 +174,7 @@ public: void fp_arith(UGeckoInstruction inst); void fcmpx(UGeckoInstruction inst); + void fctiwx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst); void cmpXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 0184205f1f..c64cbdca66 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -337,8 +337,8 @@ static GekkoOPTemplate table63[] = {264, &Jit64::fsign}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {32, &Jit64::fcmpx}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, {0, &Jit64::fcmpx}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, - {14, &Jit64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, - {15, &Jit64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, + {14, &Jit64::fctiwx}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, + {15, &Jit64::fctiwx}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, {72, &Jit64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, {136, &Jit64::fsign}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {40, &Jit64::fsign}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 0552a2c751..648da27d26 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -10,6 +10,7 @@ static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; +static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000}; void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)) { @@ -267,3 +268,44 @@ void Jit64::fcmpx(UGeckoInstruction inst) fpr.UnlockAll(); } + +void Jit64::fctiwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + int d = inst.RD; + int b = inst.RB; + fpr.Lock(d, b); + fpr.BindToRegister(d, d == b); + + // Intel uses 0x80000000 as a generic error code while PowerPC uses clamping: + // + // input | output fctiw | output CVTPD2DQ + // ------------+--------------+---------------- + // > +2^31 - 1 | 0x7fffffff | 0x80000000 + // < -2^31 | 0x80000000 | 0x80000000 + // any NaN | 0x80000000 | 0x80000000 + // + // The upper 32 bits of the result are set to 0xfff80000, + // except for -0.0 where they are set to 0xfff80001 (TODO). + + MOVAPD(XMM0, M(&half_qnan_and_s32_max)); + MINSD(XMM0, fpr.R(b)); + switch (inst.SUBOP10) + { + // fctiwx + case 14: + CVTPD2DQ(XMM0, R(XMM0)); + break; + + // fctiwzx + case 15: + CVTTPD2DQ(XMM0, R(XMM0)); + break; + } + // d[64+] must not be modified + MOVSD(fpr.R(d), XMM0); + fpr.UnlockAll(); +}