From 561744819e534e5d3981f8e1835ecbb2ba7c96c8 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 23 Aug 2015 15:35:18 -0500 Subject: [PATCH] [AArch64] Implement fctiwzx Improves the povray benchmark time by 5.6% --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../JitArm64/JitArm64_FloatingPoint.cpp | 35 +++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 2 +- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 2a851e3339..b403d54103 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -147,6 +147,7 @@ public: void fsubx(UGeckoInstruction inst); void fcmpx(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); + void fctiwzx(UGeckoInstruction inst); // Paired void ps_abs(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 690adcc983..c6c00fad3c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -558,3 +558,38 @@ void JitArm64::fcmpx(UGeckoInstruction inst) gpr.Unlock(WA); } + +void JitArm64::fctiwzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == b); + + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + ARM64Reg V0 = fpr.GetReg(); + + // Generate 0xFFF8000000000000ULL + m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL); + m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); + if (fpr.IsLower(d)) + { + m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z); + m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); + } + else + { + ARM64Reg V1 = fpr.GetReg(); + m_float_emit.FCVTN(32, EncodeRegToDouble(V1), EncodeRegToDouble(VB)); + m_float_emit.FCVTS(EncodeRegToSingle(V1), EncodeRegToSingle(V1), ROUND_Z); + m_float_emit.ORR(EncodeRegToDouble(V1), EncodeRegToDouble(V1), EncodeRegToDouble(V0)); + m_float_emit.INS(64, VD, 0, V1, 0); + fpr.Unlock(V1); + } + fpr.Unlock(V0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index a2c71acf91..153725a426 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -330,7 +330,7 @@ static GekkoOPTemplate table63[] = {32, &JitArm64::fcmpx}, // fcmpo {0, &JitArm64::fcmpx}, // fcmpu {14, &JitArm64::FallBackToInterpreter}, // fctiwx - {15, &JitArm64::FallBackToInterpreter}, // fctiwzx + {15, &JitArm64::fctiwzx}, // fctiwzx {72, &JitArm64::fmrx}, // fmrx {136, &JitArm64::fnabsx}, // fnabsx {40, &JitArm64::fnegx}, // fnegx