From 7370473eb33d5f30ded2977966707c3283c80c6f Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 15:04:26 -0600 Subject: [PATCH] [AArch64] Implement 19 floating point instructions --- Source/Core/Core/CMakeLists.txt | 1 + Source/Core/Core/PowerPC/JitArm64/Jit.h | 21 + .../JitArm64/JitArm64_FloatingPoint.cpp | 376 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 38 +- 4 files changed, 417 insertions(+), 19 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 0eddc09a24..61462124dd 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -225,6 +225,7 @@ elseif(_M_ARM_64) PowerPC/JitArm64/JitArm64_RegCache.cpp PowerPC/JitArm64/JitArm64_BackPatch.cpp PowerPC/JitArm64/JitArm64_Branch.cpp + PowerPC/JitArm64/JitArm64_FloatingPoint.cpp PowerPC/JitArm64/JitArm64_Integer.cpp PowerPC/JitArm64/JitArm64_LoadStore.cpp PowerPC/JitArm64/JitArm64_Paired.cpp diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 4e19f3b2e4..c979633ae2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -117,6 +117,27 @@ public: void lXX(UGeckoInstruction inst); void stX(UGeckoInstruction inst); + // Floating point + void fabsx(UGeckoInstruction inst); + void faddsx(UGeckoInstruction inst); + void faddx(UGeckoInstruction inst); + void fmaddsx(UGeckoInstruction inst); + void fmaddx(UGeckoInstruction inst); + void fmrx(UGeckoInstruction inst); + void fmsubsx(UGeckoInstruction inst); + void fmsubx(UGeckoInstruction inst); + void fmulsx(UGeckoInstruction inst); + void fmulx(UGeckoInstruction inst); + void fnabsx(UGeckoInstruction inst); + void fnegx(UGeckoInstruction inst); + void fnmaddsx(UGeckoInstruction inst); + void fnmaddx(UGeckoInstruction inst); + void fnmsubsx(UGeckoInstruction inst); + void fnmsubx(UGeckoInstruction inst); + void fselx(UGeckoInstruction inst); + void fsubsx(UGeckoInstruction inst); + void fsubx(UGeckoInstruction inst); + // Paired void ps_abs(UGeckoInstruction inst); void ps_add(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp new file mode 100644 index 0000000000..a670edccc8 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -0,0 +1,376 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" +#include "Common/StringUtil.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::fabsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FABS(64, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::faddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.FADD(64, VD, VA, VB); + m_float_emit.INS(64, VD, 1, VD, 0); +} + +void JitArm64::faddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FADD(64, V0, VA, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fmaddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmaddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.INS(64, VD, 0, VB, 0); +} + +void JitArm64::fmsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmulsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VC = fpr.R(inst.FC); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.FMUL(64, VD, VA, VC); + m_float_emit.INS(64, VD, 1, VD, 0); +} + +void JitArm64::fmulx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VC = fpr.R(inst.FC); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fnabsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FABS(64, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fnegx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FNEG(64, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fnmaddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fnmaddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fnmsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fnmsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fselx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + fpr.BindToRegister(inst.FD, + inst.FD == inst.FA || + inst.FD == inst.FB || + inst.FD == inst.FC); + + ARM64Reg V0 = fpr.GetReg(); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VC = gpr.R(inst.FC); + + m_float_emit.FCMPE(VA); + m_float_emit.FCSEL(V0, VC, VB, CC_GE); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.FSUB(64, VD, VA, VB); + m_float_emit.INS(64, VD, 1, VD, 0); +} + +void JitArm64::fsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FSUB(64, V0, VA, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index eb0d5fb141..3d67cb4d03 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -323,27 +323,27 @@ static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table59[] = { {18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, - {20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {20, &JitArm64::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, // {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, {24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::fmsubsx}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::fmaddsx}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::fnmsubsx}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, }; static GekkoOPTemplate table63[] = { - {264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {264, &JitArm64::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, {0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, - {72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, - {136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, + {72, &JitArm64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, + {136, &JitArm64::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {40, &JitArm64::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, {12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, {64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, @@ -357,16 +357,16 @@ static GekkoOPTemplate table63[] = static GekkoOPTemplate table63_2[] = { {18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, - {20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {20, &JitArm64::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, {22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, - {23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, + {23, &JitArm64::fselx}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, {26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::fmsubx}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::fmaddx}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::fnmsubx}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::fnmaddx}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, };