[AArch64] Implement 24 paired instructions.

This commit is contained in:
Ryan Houdek 2015-01-07 15:02:11 -06:00
parent 602702fdcb
commit 6dff4421d3
4 changed files with 546 additions and 24 deletions

View File

@ -227,6 +227,7 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitArm64_Branch.cpp
PowerPC/JitArm64/JitArm64_Integer.cpp
PowerPC/JitArm64/JitArm64_LoadStore.cpp
PowerPC/JitArm64/JitArm64_Paired.cpp
PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
PowerPC/JitArm64/JitArm64_Tables.cpp)
endif()

View File

@ -117,6 +117,32 @@ public:
void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst);
// Paired
void ps_abs(UGeckoInstruction inst);
void ps_add(UGeckoInstruction inst);
void ps_div(UGeckoInstruction inst);
void ps_madd(UGeckoInstruction inst);
void ps_madds0(UGeckoInstruction inst);
void ps_madds1(UGeckoInstruction inst);
void ps_merge00(UGeckoInstruction inst);
void ps_merge01(UGeckoInstruction inst);
void ps_merge10(UGeckoInstruction inst);
void ps_merge11(UGeckoInstruction inst);
void ps_mr(UGeckoInstruction inst);
void ps_msub(UGeckoInstruction inst);
void ps_mul(UGeckoInstruction inst);
void ps_muls0(UGeckoInstruction inst);
void ps_muls1(UGeckoInstruction inst);
void ps_nabs(UGeckoInstruction inst);
void ps_nmadd(UGeckoInstruction inst);
void ps_nmsub(UGeckoInstruction inst);
void ps_neg(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst);
void ps_sub(UGeckoInstruction inst);
void ps_sum0(UGeckoInstruction inst);
void ps_sum1(UGeckoInstruction inst);
private:
Arm64GPRCache gpr;
Arm64FPRCache fpr;

View File

@ -0,0 +1,495 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Common/StringUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(64, VD, VB);
}
void JitArm64::ps_add(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FADD(64, VD, VA, VB);
}
void JitArm64::ps_div(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FDIV(64, VD, VA, VB);
}
void JitArm64::ps_madd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_madds0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
m_float_emit.FMUL(64, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_madds1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
m_float_emit.FMUL(64, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.TRN1(64, VD, VA, VB);
}
void JitArm64::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.INS(64, VD, 0, VA, 0);
m_float_emit.INS(64, VD, 1, VB, 1);
}
void JitArm64::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
if (d != a && d != b)
{
m_float_emit.INS(64, VD, 0, VA, 1);
m_float_emit.INS(64, VD, 1, VB, 0);
}
else
{
ARM64Reg V0 = fpr.GetReg();
m_float_emit.INS(64, V0, 0, VA, 1);
m_float_emit.INS(64, V0, 1, VB, 0);
m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0);
}
}
void JitArm64::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.TRN2(64, VD, VA, VB);
}
void JitArm64::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
if (d == b)
return;
fpr.BindToRegister(d, false);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.ORR(VD, VB, VB);
}
void JitArm64::ps_mul(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMUL(64, VD, VA, VC);
}
void JitArm64::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
m_float_emit.FMUL(64, VD, VA, V0);
fpr.Unlock(V0);
}
void JitArm64::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
m_float_emit.FMUL(64, VD, VA, V0);
fpr.Unlock(V0);
}
void JitArm64::ps_msub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(64, VD, VB);
m_float_emit.FNEG(64, VD, VD);
}
void JitArm64::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FNEG(64, VD, VB);
}
void JitArm64::ps_nmadd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0);
}
void JitArm64::ps_nmsub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0);
}
void JitArm64::ps_res(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FRSQRTE(64, VD, VB);
}
void JitArm64::ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
if (d != a && d != b && d != c)
{
m_float_emit.FCMGE(64, VD, VA);
m_float_emit.BSL(VD, VC, VB);
}
else
{
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FCMGE(64, V0, VA);
m_float_emit.BSL(V0, VC, VB);
m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0);
}
}
void JitArm64::ps_sub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FSUB(64, VD, VA, VB);
}
void JitArm64::ps_sum0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VB, 1);
if (d != c)
{
m_float_emit.FADD(64, VD, V0, VA);
m_float_emit.INS(64, VD, 1, VC, 1);
}
else
{
m_float_emit.FADD(64, V0, V0, VA);
m_float_emit.INS(64, VD, 0, V0, 0);
}
fpr.Unlock(V0);
}
void JitArm64::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VA, 0);
if (d != c)
{
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.INS(64, VD, 0, VC, 0);
}
else
{
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.INS(64, VD, 1, V0, 1);
}
fpr.Unlock(V0);
}

View File

@ -114,39 +114,39 @@ static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm64::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm64::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm64::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{40, &JitArm64::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm64::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm64::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm64::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm64::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm64::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm64::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm64::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm64::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm64::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm64::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm64::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm64::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
};
static GekkoOPTemplate table4_2[] =
{
{10, &JitArm64::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm64::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm64::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm64::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm64::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm64::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm64::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitArm64::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitArm64::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitArm64::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitArm64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitArm64::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}},
{10, &JitArm64::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm64::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm64::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm64::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm64::ps_madds0}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm64::ps_madds1}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm64::ps_div}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitArm64::ps_sub}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitArm64::ps_add}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitArm64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitArm64::ps_res}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitArm64::ps_mul}, //"ps_mul", OPTYPE_PS, 0}},
{26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &JitArm64::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitArm64::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitArm64::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitArm64::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}},
{28, &JitArm64::ps_msub}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitArm64::ps_madd}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitArm64::ps_nmsub}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitArm64::ps_nmadd}, //"ps_nmadd", OPTYPE_PS, 0}},
};