[AArch64] Implements 15 floating loadstores.

This commit is contained in:
Ryan Houdek 2015-01-07 15:09:07 -06:00
parent 7370473eb3
commit 5a28883f9e
5 changed files with 431 additions and 16 deletions

View File

@ -228,6 +228,7 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
PowerPC/JitArm64/JitArm64_Integer.cpp
PowerPC/JitArm64/JitArm64_LoadStore.cpp
PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
PowerPC/JitArm64/JitArm64_Paired.cpp
PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
PowerPC/JitArm64/JitArm64_Tables.cpp)

View File

@ -16,6 +16,7 @@ void JitArm64::Init()
{
AllocCodeSpace(CODE_SIZE);
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
gpr.Init(this);
fpr.Init(this);
@ -289,6 +290,21 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
js.next_compilerPC = ops[i + 1].address;
}
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
gpr.Lock(W30);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
regs_in_use[W30] = 0;
ABI_PushRegisters(regs_in_use);
MOVI2R(X30, (u64)&GPFifo::CheckGatherPipe);
BLR(X30);
ABI_PopRegisters(regs_in_use);
gpr.Unlock(W30);
}
if (!ops[i].skip)
{
if (js.memcheck && (opinfo->flags & FL_USE_FPU))

View File

@ -117,6 +117,10 @@ public:
void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst);
// LoadStore floating point
void lfXX(UGeckoInstruction inst);
void stfXX(UGeckoInstruction inst);
// Floating point
void fabsx(UGeckoInstruction inst);
void faddsx(UGeckoInstruction inst);

View File

@ -0,0 +1,394 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
s32 offset_reg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 567: // lfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offset_reg = b;
break;
case 535: // lfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offset_reg = b;
break;
case 599: // lfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offset_reg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 48: // lfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg addr_reg = W0;
gpr.Lock(W0, W30);
fpr.Lock(Q0);
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offset_reg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else
{
if (offset_reg == -1)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
ADD(addr_reg, gpr.R(offset_reg), gpr.R(a));
}
}
}
else
{
if (offset_reg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else if (!a && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offset_reg);
}
else if (a)
{
ADD(addr_reg, gpr.R(a), gpr.R(offset_reg));
}
else
{
MOV(addr_reg, gpr.R(offset_reg));
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(a), addr_reg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 fpr_ignore_mask(0);
regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0
fpr_ignore_mask[VD - Q0] = 1;
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, VD, XA);
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
VD, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use, fpr_ignore_mask);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W30);
fpr.Unlock(Q0);
}
void JitArm64::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
s32 offset_reg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 663: // stfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 695: // stfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 727: // stfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offset_reg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offset_reg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 52: // stfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg V0 = fpr.R(inst.FS);
ARM64Reg addr_reg;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
addr_reg = W0;
else
addr_reg = W1;
gpr.Lock(W0, W1, W30);
fpr.Lock(Q0);
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offset_reg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else
{
if (offset_reg == -1)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
ADD(addr_reg, gpr.R(offset_reg), gpr.R(a));
}
}
}
else
{
if (offset_reg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else if (!a && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offset_reg);
}
else if (a)
{
ADD(addr_reg, gpr.R(a), gpr.R(offset_reg));
}
else
{
MOV(addr_reg, gpr.R(offset_reg));
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(a), addr_reg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0
if (is_immediate)
{
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount);
MOVI2R(X1, (u64)GPFifo::m_gatherPipe);
LDR(INDEX_UNSIGNED, W0, X30, 0);
ADD(X1, X1, X0);
if (accessSize == 64)
{
m_float_emit.REV64(8, Q0, V0);
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0);
}
else if (accessSize == 32)
{
m_float_emit.FCVT(32, 64, Q0, V0);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0);
}
ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, 0);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, V0, XA);
}
else
{
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags, false, false, V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W1, W30);
fpr.Unlock(Q0);
}

View File

@ -84,15 +84,15 @@ static GekkoOPTemplate primarytable[] =
{46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{48, &JitArm64::lfXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm64::lfXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm64::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm64::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm64::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm64::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm64::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm64::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
@ -255,15 +255,15 @@ static GekkoOPTemplate table31[] =
{725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}},
// fp load/store
{535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{535, &JitArm64::lfXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm64::lfXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm64::lfXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm64::lfXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{663, &JitArm64::stfXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm64::stfXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm64::stfXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm64::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},