[AArch64] Implement 19 floating point instructions

This commit is contained in:
Ryan Houdek 2015-01-07 15:04:26 -06:00
parent 6dff4421d3
commit 7370473eb3
4 changed files with 417 additions and 19 deletions

View File

@ -225,6 +225,7 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitArm64_RegCache.cpp
PowerPC/JitArm64/JitArm64_BackPatch.cpp
PowerPC/JitArm64/JitArm64_Branch.cpp
PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
PowerPC/JitArm64/JitArm64_Integer.cpp
PowerPC/JitArm64/JitArm64_LoadStore.cpp
PowerPC/JitArm64/JitArm64_Paired.cpp

View File

@ -117,6 +117,27 @@ public:
void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst);
// Floating point
void fabsx(UGeckoInstruction inst);
void faddsx(UGeckoInstruction inst);
void faddx(UGeckoInstruction inst);
void fmaddsx(UGeckoInstruction inst);
void fmaddx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst);
void fmsubsx(UGeckoInstruction inst);
void fmsubx(UGeckoInstruction inst);
void fmulsx(UGeckoInstruction inst);
void fmulx(UGeckoInstruction inst);
void fnabsx(UGeckoInstruction inst);
void fnegx(UGeckoInstruction inst);
void fnmaddsx(UGeckoInstruction inst);
void fnmaddx(UGeckoInstruction inst);
void fnmsubsx(UGeckoInstruction inst);
void fnmsubx(UGeckoInstruction inst);
void fselx(UGeckoInstruction inst);
void fsubsx(UGeckoInstruction inst);
void fsubx(UGeckoInstruction inst);
// Paired
void ps_abs(UGeckoInstruction inst);
void ps_add(UGeckoInstruction inst);

View File

@ -0,0 +1,376 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Common/StringUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::fabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(64, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::faddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FADD(64, VD, VA, VB);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::faddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FADD(64, V0, VA, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.INS(64, VD, 0, VB, 0);
}
void JitArm64::fmsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmulsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FMUL(64, VD, VA, VC);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fmulx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(64, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FNEG(64, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fselx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD,
inst.FD == inst.FA ||
inst.FD == inst.FB ||
inst.FD == inst.FC);
ARM64Reg V0 = fpr.GetReg();
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VC = gpr.R(inst.FC);
m_float_emit.FCMPE(VA);
m_float_emit.FCSEL(V0, VC, VB, CC_GE);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FSUB(64, VD, VA, VB);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FSUB(64, V0, VA, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}

View File

@ -323,27 +323,27 @@ static GekkoOPTemplate table31_2[] =
static GekkoOPTemplate table59[] =
{
{18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}},
{20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{20, &JitArm64::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
// {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}},
{24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::fmsubsx}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::fmaddsx}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::fnmsubsx}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
};
static GekkoOPTemplate table63[] =
{
{264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{264, &JitArm64::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
{0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm64::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm64::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
{64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},
@ -357,16 +357,16 @@ static GekkoOPTemplate table63[] =
static GekkoOPTemplate table63_2[] =
{
{18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}},
{20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}},
{20, &JitArm64::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}},
{22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitArm64::fselx}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::fmsubx}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::fmaddx}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::fnmsubx}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::fnmaddx}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
};