Merge pull request #1852 from Sonicadvance1/AArch64_Recompiler_Improvements

Implements a large portion of the recompiler for AArch64
This commit is contained in:
Ryan Houdek 2015-01-11 07:08:16 -06:00
commit 83a415198d
13 changed files with 2147 additions and 157 deletions

View File

@ -225,8 +225,11 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitArm64_RegCache.cpp
PowerPC/JitArm64/JitArm64_BackPatch.cpp
PowerPC/JitArm64/JitArm64_Branch.cpp
PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
PowerPC/JitArm64/JitArm64_Integer.cpp
PowerPC/JitArm64/JitArm64_LoadStore.cpp
PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
PowerPC/JitArm64/JitArm64_Paired.cpp
PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
PowerPC/JitArm64/JitArm64_Tables.cpp)
endif()

View File

@ -16,6 +16,7 @@ void JitArm64::Init()
{
AllocCodeSpace(CODE_SIZE);
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
gpr.Init(this);
fpr.Init(this);
@ -179,6 +180,14 @@ void JitArm64::WriteExitDestInR(ARM64Reg Reg)
BR(EncodeRegTo64(Reg));
}
void JitArm64::DumpCode(const u8* start, const u8* end)
{
std::string output = "";
for (u8* code = (u8*)start; code < end; code += 4)
output += StringFromFormat("%08x", Common::swap32(*(u32*)code));
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
}
void JitArm64::Run()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
@ -281,6 +290,21 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
js.next_compilerPC = ops[i + 1].address;
}
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
gpr.Lock(W30);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
regs_in_use[W30] = 0;
ABI_PushRegisters(regs_in_use);
MOVI2R(X30, (u64)&GPFifo::CheckGatherPipe);
BLR(X30);
ABI_PopRegisters(regs_in_use);
gpr.Unlock(W30);
}
if (!ops[i].skip)
{
if (js.memcheck && (opinfo->flags & FL_USE_FPU))
@ -294,6 +318,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreRegister(j);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{

View File

@ -21,11 +21,13 @@
// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock
{
public:
JitArm64() : code_buffer(32000) {}
JitArm64() : code_buffer(32000), m_float_emit(this) {}
~JitArm64() {}
void Init();
@ -80,6 +82,7 @@ public:
// Integer
void arith_imm(UGeckoInstruction inst);
void boolX(UGeckoInstruction inst);
void addx(UGeckoInstruction inst);
void extsXx(UGeckoInstruction inst);
void cntlzwx(UGeckoInstruction inst);
void negx(UGeckoInstruction inst);
@ -87,6 +90,14 @@ public:
void cmpl(UGeckoInstruction inst);
void cmpi(UGeckoInstruction inst);
void cmpli(UGeckoInstruction inst);
void rlwinmx(UGeckoInstruction inst);
void srawix(UGeckoInstruction inst);
void mullwx(UGeckoInstruction inst);
void addic(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
void subfx(UGeckoInstruction inst);
void addcx(UGeckoInstruction inst);
// System Registers
void mtmsr(UGeckoInstruction inst);
@ -97,12 +108,66 @@ public:
void mfsrin(UGeckoInstruction inst);
void mtsrin(UGeckoInstruction inst);
void twx(UGeckoInstruction inst);
void mfspr(UGeckoInstruction inst);
void mftb(UGeckoInstruction inst);
void mtspr(UGeckoInstruction inst);
// LoadStore
void icbi(UGeckoInstruction inst);
void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst);
// LoadStore floating point
void lfXX(UGeckoInstruction inst);
void stfXX(UGeckoInstruction inst);
// Floating point
void fabsx(UGeckoInstruction inst);
void faddsx(UGeckoInstruction inst);
void faddx(UGeckoInstruction inst);
void fmaddsx(UGeckoInstruction inst);
void fmaddx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst);
void fmsubsx(UGeckoInstruction inst);
void fmsubx(UGeckoInstruction inst);
void fmulsx(UGeckoInstruction inst);
void fmulx(UGeckoInstruction inst);
void fnabsx(UGeckoInstruction inst);
void fnegx(UGeckoInstruction inst);
void fnmaddsx(UGeckoInstruction inst);
void fnmaddx(UGeckoInstruction inst);
void fnmsubsx(UGeckoInstruction inst);
void fnmsubx(UGeckoInstruction inst);
void fselx(UGeckoInstruction inst);
void fsubsx(UGeckoInstruction inst);
void fsubx(UGeckoInstruction inst);
// Paired
void ps_abs(UGeckoInstruction inst);
void ps_add(UGeckoInstruction inst);
void ps_div(UGeckoInstruction inst);
void ps_madd(UGeckoInstruction inst);
void ps_madds0(UGeckoInstruction inst);
void ps_madds1(UGeckoInstruction inst);
void ps_merge00(UGeckoInstruction inst);
void ps_merge01(UGeckoInstruction inst);
void ps_merge10(UGeckoInstruction inst);
void ps_merge11(UGeckoInstruction inst);
void ps_mr(UGeckoInstruction inst);
void ps_msub(UGeckoInstruction inst);
void ps_mul(UGeckoInstruction inst);
void ps_muls0(UGeckoInstruction inst);
void ps_muls1(UGeckoInstruction inst);
void ps_nabs(UGeckoInstruction inst);
void ps_nmadd(UGeckoInstruction inst);
void ps_nmsub(UGeckoInstruction inst);
void ps_neg(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst);
void ps_sub(UGeckoInstruction inst);
void ps_sum0(UGeckoInstruction inst);
void ps_sum1(UGeckoInstruction inst);
private:
Arm64GPRCache gpr;
Arm64FPRCache fpr;
@ -112,6 +177,11 @@ private:
PPCAnalyst::CodeBuffer code_buffer;
ARM64FloatEmitter m_float_emit;
// Dump a memory range of code
void DumpCode(const u8* start, const u8* end);
// The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info;
@ -137,6 +207,8 @@ private:
void ComputeRC(Arm64Gen::ARM64Reg reg, int crf = 0);
void ComputeRC(u32 imm, int crf = 0);
void ComputeCarry(bool Carry);
void ComputeCarry();
typedef u32 (*Operation)(u32, u32);
void reg_imm(u32 d, u32 a, bool binary, u32 value, Operation do_op, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, ArithOption), bool Rc = false);

View File

@ -29,7 +29,8 @@ static void DoBacktrace(uintptr_t access_address, SContext* ctx)
for (u64 pc = (ctx->CTX_PC - 32); pc < (ctx->CTX_PC + 32); pc += 16)
{
pc_memory += StringFromFormat("%08x%08x%08x%08x",
*(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12));
Common::swap32(*(u32*)pc), Common::swap32(*(u32*)(pc + 4)),
Common::swap32(*(u32*)(pc + 8)), Common::swap32(*(u32*)(pc + 12)));
ERROR_LOG(DYNA_REC, "0x%016lx: %08x %08x %08x %08x",
pc, *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12));
@ -51,10 +52,34 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
*flags |= BackPatchInfo::FLAG_SIZE_8;
else if (size == 1) // 16-bit
*flags |= BackPatchInfo::FLAG_SIZE_16;
else // 32-bit
else if (size == 2) // 32-bit
*flags |= BackPatchInfo::FLAG_SIZE_32;
else if (size == 3) // 64-bit
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (op == 0xE5) // Load
if (op == 0xF5) // NEON LDR
{
if (size == 2) // 32-bit float
{
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
*flags |= BackPatchInfo::FLAG_LOAD;
*reg = (ARM64Reg)(inst & 0x1F);
return true;
}
else if (op == 0xF4) // NEON STR
{
if (size == 2) // 32-bit float
{
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
*flags |= BackPatchInfo::FLAG_STORE;
*reg = (ARM64Reg)(inst & 0x1F);
return true;
}
else if (op == 0xE5) // Load
{
*flags |= BackPatchInfo::FLAG_LOAD;
*reg = (ARM64Reg)(inst & 0x1F);
@ -90,10 +115,38 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.FCVT(32, 64, Q0, RS);
float_emit.REV32(8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
}
else
{
float_emit.REV64(8, Q0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
ARM64FloatEmitter float_emit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.LD1R(32, RS, addr);
float_emit.REV64(8, RS, RS);
float_emit.FCVTL(64, RS, RS);
}
else
{
float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
float_emit.REV64(8, Q0, Q0);
float_emit.INS(64, RS, 0, Q0, 0);
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
@ -143,10 +196,39 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.FCVT(32, 64, Q0, RS);
float_emit.FMOV(32, false, W0, Q0);
emit->MOVI2R(X30, (u64)&Memory::Write_U32);
emit->BLR(X30);
}
else
{
emit->MOVI2R(X30, (u64)&Memory::Write_F64);
float_emit.DUP(64, Q0, RS);
emit->BLR(X30);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOVI2R(X30, (u64)&Memory::Read_U32);
emit->BLR(X30);
float_emit.DUP(32, RS, X0);
float_emit.FCVTL(64, RS, RS);
}
else
{
emit->MOVI2R(X30, (u64)&Memory::Read_F64);
emit->BLR(X30);
float_emit.INS(64, RS, 0, X0);
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
@ -245,7 +327,8 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
ctx->CTX_PC = new_pc;
// Wipe the top bits of the addr_register
if (flags & BackPatchInfo::FLAG_STORE)
if (flags & BackPatchInfo::FLAG_STORE &&
!(flags & BackPatchInfo::FLAG_SIZE_F64))
ctx->CTX_REG(1) &= 0xFFFFFFFFUll;
else
ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
@ -382,6 +465,46 @@ void JitArm64::InitBackpatch()
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
@ -446,6 +569,46 @@ void JitArm64::InitBackpatch()
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}

View File

@ -0,0 +1,376 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Common/StringUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::fabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(64, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::faddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FADD(64, VD, VA, VB);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::faddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FADD(64, V0, VA, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.INS(64, VD, 0, VB, 0);
}
void JitArm64::fmsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmulsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FMUL(64, VD, VA, VC);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fmulx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(64, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FNEG(64, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fselx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD,
inst.FD == inst.FA ||
inst.FD == inst.FB ||
inst.FD == inst.FC);
ARM64Reg V0 = fpr.GetReg();
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VC = gpr.R(inst.FC);
m_float_emit.FCMPE(VA);
m_float_emit.FCSEL(V0, VC, VB, CC_GE);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FSUB(64, VD, VA, VB);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FSUB(64, V0, VA, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}

View File

@ -39,6 +39,28 @@ void JitArm64::ComputeRC(u32 imm, int crf)
gpr.Unlock(WA);
}
void JitArm64::ComputeCarry(bool Carry)
{
if (Carry)
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, 1);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
return;
}
STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca));
}
void JitArm64::ComputeCarry()
{
ARM64Reg WA = gpr.GetReg();
CSINC(WA, WSP, WSP, CC_CC);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
}
// Following static functions are used in conjunction with reg_imm
static u32 Add(u32 a, u32 b)
{
@ -245,6 +267,29 @@ void JitArm64::boolX(UGeckoInstruction inst)
}
}
void JitArm64::addx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, i + j);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
ADD(gpr.R(d), gpr.R(a), gpr.R(b));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::extsXx(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -415,3 +460,237 @@ void JitArm64::cmpli(UGeckoInstruction inst)
FALLBACK_IF(true);
}
void JitArm64::rlwinmx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
u32 mask = Helper_Mask(inst.MB,inst.ME);
if (gpr.IsImm(inst.RS))
{
gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask);
if (inst.Rc)
ComputeRC(gpr.GetImm(inst.RA), 0);
return;
}
gpr.BindToRegister(inst.RA, inst.RA == inst.RS);
ARM64Reg WA = gpr.GetReg();
ArithOption Shift(gpr.R(inst.RS), ST_ROR, 32 - inst.SH);
MOVI2R(WA, mask);
AND(gpr.R(inst.RA), WA, gpr.R(inst.RS), Shift);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(inst.RA), 0);
}
void JitArm64::srawix(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA;
int s = inst.RS;
int amount = inst.SH;
if (gpr.IsImm(s))
{
s32 imm = (s32)gpr.GetImm(s);
gpr.SetImmediate(a, imm >> amount);
if (amount != 0 && (imm < 0) && (imm << (32 - amount)))
ComputeCarry(true);
else
ComputeCarry(false);
}
else if (amount != 0)
{
gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s);
ARM64Reg WA = gpr.GetReg();
ORR(WA, WSP, RS, ArithOption(RS, ST_LSL, 32 - amount));
ORR(RA, WSP, RS, ArithOption(RS, ST_ASR, amount));
if (inst.Rc)
ComputeRC(RA, 0);
ANDS(WSP, WA, RA, ArithOption(RA, ST_LSL, 0));
CSINC(WA, WSP, WSP, CC_EQ);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
}
else
{
gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s);
MOV(RA, RS);
STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca));
}
}
void JitArm64::addic(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
bool rc = inst.OPCD == 13;
s32 simm = inst.SIMM_16;
u32 imm = (u32)simm;
if (gpr.IsImm(a))
{
u32 i = gpr.GetImm(a);
gpr.SetImmediate(d, i + imm);
bool has_carry = Interpreter::Helper_Carry(i, imm);
ComputeCarry(has_carry);
if (rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a);
if (imm < 4096)
{
ADDS(gpr.R(d), gpr.R(a), imm);
}
else if (simm > -4096 && simm < 0)
{
SUBS(gpr.R(d), gpr.R(a), std::abs(simm));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, imm);
ADDS(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
ComputeCarry();
if (rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::mulli(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, d = inst.RD;
if (gpr.IsImm(a))
{
s32 i = (s32)gpr.GetImm(a);
gpr.SetImmediate(d, i * inst.SIMM_16);
}
else
{
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, (u32)(s32)inst.SIMM_16);
MUL(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
}
void JitArm64::mullwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, i * j);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a || d == b);
MUL(gpr.R(d), gpr.R(a), gpr.R(b));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::addzex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, d = inst.RD;
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
CMP(WA, 1);
CSINC(gpr.R(d), gpr.R(a), gpr.R(a), CC_NEQ);
CMP(gpr.R(d), 0);
gpr.Unlock(WA);
ComputeCarry();
}
void JitArm64::subfx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, j - i);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
SUB(gpr.R(d), gpr.R(b), gpr.R(a));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::addcx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, i * j);
bool has_carry = Interpreter::Helper_Carry(i, j);
ComputeCarry(has_carry);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a || d == b);
ADDS(gpr.R(d), gpr.R(a), gpr.R(b));
ComputeCarry();
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}

View File

@ -41,6 +41,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
off_reg = gpr.R(offsetReg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 ignore_mask(0);
regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
@ -114,25 +115,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(addr), addr_reg);
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA);
if (update)
MOVI2R(up_reg, imm_addr);
}
else
{
if (update)
MOV(up_reg, addr_reg);
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
dest_reg, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use, ignore_mask);
}
@ -155,6 +155,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
reg_dest = gpr.R(dest);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
@ -237,10 +238,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
RS, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
@ -321,8 +324,6 @@ void JitArm64::lXX(UGeckoInstruction inst)
break;
}
FALLBACK_IF(update);
SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update);
// LWZ idle skipping

View File

@ -0,0 +1,394 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
s32 offset_reg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 567: // lfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offset_reg = b;
break;
case 535: // lfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offset_reg = b;
break;
case 599: // lfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offset_reg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 48: // lfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg addr_reg = W0;
gpr.Lock(W0, W30);
fpr.Lock(Q0);
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offset_reg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else
{
if (offset_reg == -1)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
ADD(addr_reg, gpr.R(offset_reg), gpr.R(a));
}
}
}
else
{
if (offset_reg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else if (!a && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offset_reg);
}
else if (a)
{
ADD(addr_reg, gpr.R(a), gpr.R(offset_reg));
}
else
{
MOV(addr_reg, gpr.R(offset_reg));
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(a), addr_reg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 fpr_ignore_mask(0);
regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0
fpr_ignore_mask[VD - Q0] = 1;
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, VD, XA);
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
VD, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use, fpr_ignore_mask);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W30);
fpr.Unlock(Q0);
}
void JitArm64::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
s32 offset_reg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 663: // stfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 695: // stfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 727: // stfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offset_reg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offset_reg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 52: // stfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg V0 = fpr.R(inst.FS);
ARM64Reg addr_reg;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
addr_reg = W0;
else
addr_reg = W1;
gpr.Lock(W0, W1, W30);
fpr.Lock(Q0);
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offset_reg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else
{
if (offset_reg == -1)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
ADD(addr_reg, gpr.R(offset_reg), gpr.R(a));
}
}
}
else
{
if (offset_reg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else if (!a && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offset_reg);
}
else if (a)
{
ADD(addr_reg, gpr.R(a), gpr.R(offset_reg));
}
else
{
MOV(addr_reg, gpr.R(offset_reg));
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(a), addr_reg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0
if (is_immediate)
{
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount);
MOVI2R(X1, (u64)GPFifo::m_gatherPipe);
LDR(INDEX_UNSIGNED, W0, X30, 0);
ADD(X1, X1, X0);
if (accessSize == 64)
{
m_float_emit.REV64(8, Q0, V0);
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0);
}
else if (accessSize == 32)
{
m_float_emit.FCVT(32, 64, Q0, V0);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0);
}
ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, 0);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, V0, XA);
}
else
{
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags, false, false, V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W1, W30);
fpr.Unlock(Q0);
}

View File

@ -0,0 +1,495 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Common/StringUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(64, VD, VB);
}
void JitArm64::ps_add(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FADD(64, VD, VA, VB);
}
void JitArm64::ps_div(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FDIV(64, VD, VA, VB);
}
void JitArm64::ps_madd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_madds0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
m_float_emit.FMUL(64, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_madds1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
m_float_emit.FMUL(64, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.TRN1(64, VD, VA, VB);
}
void JitArm64::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.INS(64, VD, 0, VA, 0);
m_float_emit.INS(64, VD, 1, VB, 1);
}
void JitArm64::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
if (d != a && d != b)
{
m_float_emit.INS(64, VD, 0, VA, 1);
m_float_emit.INS(64, VD, 1, VB, 0);
}
else
{
ARM64Reg V0 = fpr.GetReg();
m_float_emit.INS(64, V0, 0, VA, 1);
m_float_emit.INS(64, V0, 1, VB, 0);
m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0);
}
}
void JitArm64::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.TRN2(64, VD, VA, VB);
}
void JitArm64::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
if (d == b)
return;
fpr.BindToRegister(d, false);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.ORR(VD, VB, VB);
}
void JitArm64::ps_mul(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMUL(64, VD, VA, VC);
}
void JitArm64::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
m_float_emit.FMUL(64, VD, VA, V0);
fpr.Unlock(V0);
}
void JitArm64::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
m_float_emit.FMUL(64, VD, VA, V0);
fpr.Unlock(V0);
}
void JitArm64::ps_msub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(64, VD, VB);
m_float_emit.FNEG(64, VD, VD);
}
void JitArm64::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FNEG(64, VD, VB);
}
void JitArm64::ps_nmadd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0);
}
void JitArm64::ps_nmsub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0);
}
void JitArm64::ps_res(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FRSQRTE(64, VD, VB);
}
void JitArm64::ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
if (d != a && d != b && d != c)
{
m_float_emit.FCMGE(64, VD, VA);
m_float_emit.BSL(VD, VC, VB);
}
else
{
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FCMGE(64, V0, VA);
m_float_emit.BSL(V0, VC, VB);
m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0);
}
}
void JitArm64::ps_sub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FSUB(64, VD, VA, VB);
}
void JitArm64::ps_sum0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VB, 1);
if (d != c)
{
m_float_emit.FADD(64, VD, V0, VA);
m_float_emit.INS(64, VD, 1, VC, 1);
}
else
{
m_float_emit.FADD(64, V0, V0, VA);
m_float_emit.INS(64, VD, 0, V0, 0);
}
fpr.Unlock(V0);
}
void JitArm64::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VA, 0);
if (d != c)
{
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.INS(64, VD, 0, VC, 0);
}
else
{
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.INS(64, VD, 1, V0, 1);
}
fpr.Unlock(V0);
}

View File

@ -10,6 +10,7 @@ using namespace Arm64Gen;
void Arm64RegCache::Init(ARM64XEmitter *emitter)
{
m_emit = emitter;
m_float_emit.reset(new ARM64FloatEmitter(m_emit));
GetAllocationOrder();
}
@ -56,6 +57,23 @@ void Arm64RegCache::UnlockRegister(ARM64Reg host_reg)
reg->Unlock();
}
void Arm64RegCache::FlushMostStaleRegister()
{
u32 most_stale_preg = 0;
u32 most_stale_amount = 0;
for (u32 i = 0; i < 32; ++i)
{
u32 last_used = m_guest_registers[i].GetLastUsed();
if (last_used > most_stale_amount &&
m_guest_registers[i].GetType() == REG_REG)
{
most_stale_preg = i;
most_stale_amount = last_used;
}
}
FlushRegister(most_stale_preg, false);
}
// GPR Cache
void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{
@ -212,23 +230,6 @@ void Arm64GPRCache::GetAllocationOrder()
m_host_registers.push_back(HostReg(reg));
}
void Arm64GPRCache::FlushMostStaleRegister()
{
u32 most_stale_preg = 0;
u32 most_stale_amount = 0;
for (u32 i = 0; i < 32; ++i)
{
u32 last_used = m_guest_registers[i].GetLastUsed();
if (last_used > most_stale_amount &&
m_guest_registers[i].GetType() == REG_REG)
{
most_stale_preg = i;
most_stale_amount = last_used;
}
}
FlushRegister(most_stale_preg, false);
}
BitSet32 Arm64GPRCache::GetCallerSavedUsed()
{
BitSet32 registers(0);
@ -254,35 +255,120 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg)
// FPR Cache
void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
{
// XXX: Flush our stuff
for (int i = 0; i < 32; ++i)
{
bool flush = true;
if (mode == FLUSH_INTERPRETER)
{
if (!(op->regsOut[i] || op->regsIn[i]))
{
// This interpreted instruction doesn't use this register
flush = false;
}
}
if (m_guest_registers[i].GetType() == REG_REG)
{
// Has to be flushed if it isn't in a callee saved register
ARM64Reg host_reg = m_guest_registers[i].GetReg();
if (flush || !IsCalleeSaved(host_reg))
FlushRegister(i, mode == FLUSH_MAINTAIN_STATE);
}
}
}
ARM64Reg Arm64FPRCache::R(u32 preg)
{
// XXX: return a host reg holding a guest register
OpArg& reg = m_guest_registers[preg];
IncrementAllUsed();
reg.ResetLastUsed();
switch (reg.GetType())
{
case REG_REG: // already in a reg
return reg.GetReg();
break;
case REG_NOTLOADED: // Register isn't loaded at /all/
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
return host_reg;
}
break;
default:
_dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!");
break;
}
// We've got an issue if we end up here
return INVALID_REG;
}
void Arm64FPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
if (do_load)
m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
}
}
void Arm64FPRCache::GetAllocationOrder()
{
const std::vector<ARM64Reg> allocation_order =
{
D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
D11, D12, D13, D14, D15, D16, D17, D18, D19,
D20, D21, D22, D23, D24, D25, D26, D27, D28,
D29, D30, D31,
// Callee saved
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
// Caller saved
Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23,
Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31,
Q7, Q6, Q5, Q4, Q3, Q2, Q1, Q0
};
for (ARM64Reg reg : allocation_order)
m_host_registers.push_back(HostReg(reg));
}
void Arm64FPRCache::FlushMostStaleRegister()
{
// XXX: Flush a register
}
void Arm64FPRCache::FlushByHost(ARM64Reg host_reg)
{
// XXX: Scan guest registers and flush if found
}
bool Arm64FPRCache::IsCalleeSaved(ARM64Reg reg)
{
static std::vector<ARM64Reg> callee_regs =
{
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, INVALID_REG,
};
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
}
void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_REG)
{
ARM64Reg host_reg = reg.GetReg();
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state)
{
UnlockRegister(host_reg);
reg.Flush();
}
}
}
BitSet32 Arm64FPRCache::GetCallerSavedUsed()
{
BitSet32 registers(0);
for (auto& it : m_host_registers)
if (it.IsLocked())
registers[Q0 - it.GetReg()] = 1;
return registers;
}

View File

@ -119,7 +119,7 @@ private:
class Arm64RegCache
{
public:
Arm64RegCache() : m_emit(nullptr), m_reg_stats(nullptr) {};
Arm64RegCache() : m_emit(nullptr), m_float_emit(nullptr), m_reg_stats(nullptr) {};
virtual ~Arm64RegCache() {};
void Init(ARM64XEmitter *emitter);
@ -133,10 +133,14 @@ public:
// Will dump an immediate to the host register as well
virtual ARM64Reg R(u32 reg) = 0;
virtual BitSet32 GetCallerSavedUsed() = 0;
// Returns a temporary register for use
// Requires unlocking after done
ARM64Reg GetReg();
void StoreRegister(u32 preg) { FlushRegister(preg, false); }
// Locks a register so a cache cannot use it
// Useful for function calls
template<typename T = ARM64Reg, typename... Args>
@ -166,7 +170,7 @@ protected:
virtual void GetAllocationOrder() = 0;
// Flushes the most stale register
virtual void FlushMostStaleRegister() = 0;
void FlushMostStaleRegister();
// Lock a register
void LockRegister(ARM64Reg host_reg);
@ -177,15 +181,31 @@ protected:
// Flushes a guest register by host provided
virtual void FlushByHost(ARM64Reg host_reg) = 0;
virtual void FlushRegister(u32 preg, bool maintain_state) = 0;
// Get available host registers
u32 GetUnlockedRegisterCount();
void IncrementAllUsed()
{
for (auto& reg : m_guest_registers)
reg.IncrementLastUsed();
}
// Code emitter
ARM64XEmitter *m_emit;
// Float emitter
std::unique_ptr<ARM64FloatEmitter> m_float_emit;
// Host side registers that hold the host registers in order of use
std::vector<HostReg> m_host_registers;
// Our guest GPRs
// PowerPC has 32 GPRs
// PowerPC also has 32 paired FPRs
OpArg m_guest_registers[32];
// Register stats for the current block
PPCAnalyst::BlockRegStats *m_reg_stats;
};
@ -215,34 +235,20 @@ public:
void BindToRegister(u32 preg, bool do_load);
void StoreRegister(u32 preg) { FlushRegister(preg, false); }
BitSet32 GetCallerSavedUsed();
BitSet32 GetCallerSavedUsed() override;
protected:
// Get the order of the host registers
void GetAllocationOrder();
// Flushes the most stale register
void FlushMostStaleRegister();
// Flushes a guest register by host provided
void FlushByHost(ARM64Reg host_reg) override;
// Our guest GPRs
// PowerPC has 32 GPRs
OpArg m_guest_registers[32];
void FlushRegister(u32 preg, bool maintain_state) override;
private:
bool IsCalleeSaved(ARM64Reg reg);
void IncrementAllUsed()
{
for (auto& reg : m_guest_registers)
reg.IncrementLastUsed();
}
void FlushRegister(u32 preg, bool maintain_state);
};
class Arm64FPRCache : public Arm64RegCache
@ -256,17 +262,19 @@ public:
// Will dump an immediate to the host register as well
ARM64Reg R(u32 preg);
void BindToRegister(u32 preg, bool do_load);
BitSet32 GetCallerSavedUsed() override;
protected:
// Get the order of the host registers
void GetAllocationOrder();
// Flushes the most stale register
void FlushMostStaleRegister();
// Flushes a guest register by host provided
void FlushByHost(ARM64Reg host_reg) override;
// Our guest FPRs
// Gekko has 32 paired registers(32x2)
OpArg m_guest_registers[32][2];
void FlushRegister(u32 preg, bool maintain_state) override;
private:
bool IsCalleeSaved(ARM64Reg reg);
};

View File

@ -196,3 +196,90 @@ void JitArm64::twx(UGeckoInstruction inst)
WriteExit(js.compilerPC + 4);
}
}
void JitArm64::mfspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_XER:
case SPR_WPAR:
case SPR_DEC:
case SPR_TL:
case SPR_TU:
FALLBACK_IF(true);
default:
gpr.BindToRegister(inst.RD, false);
ARM64Reg RD = gpr.R(inst.RD);
LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
break;
}
}
void JitArm64::mftb(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
mfspr(inst);
}
void JitArm64::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_DMAU:
case SPR_SPRG0:
case SPR_SPRG1:
case SPR_SPRG2:
case SPR_SPRG3:
case SPR_SRR0:
case SPR_SRR1:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_LR:
case SPR_CTR:
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_XER:
{
FALLBACK_IF(true);
ARM64Reg RD = gpr.R(inst.RD);
ARM64Reg WA = gpr.GetReg();
ARM64Reg mask = gpr.GetReg();
MOVI2R(mask, 0xFF7F);
AND(WA, RD, mask, ArithOption(mask, ST_LSL, 0));
STRH(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_stringctrl));
UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
UBFM(WA, RD, XER_OV_SHIFT, 31); // Same as WA = RD >> XER_OV_SHIFT
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(WA, mask);
}
break;
default:
FALLBACK_IF(true);
}
// OK, this is easy.
ARM64Reg RD = gpr.R(inst.RD);
STR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
}

View File

@ -45,17 +45,17 @@ static GekkoOPTemplate primarytable[] =
{3, &JitArm64::twx}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}},
{7, &JitArm64::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{7, &JitArm64::mulli}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{10, &JitArm64::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, &JitArm64::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, &JitArm64::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &JitArm64::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{12, &JitArm64::addic}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &JitArm64::addic}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{14, &JitArm64::arith_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{15, &JitArm64::arith_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}},
{21, &JitArm64::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{21, &JitArm64::rlwinmx}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}},
{24, &JitArm64::arith_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}},
@ -84,15 +84,15 @@ static GekkoOPTemplate primarytable[] =
{46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{48, &JitArm64::lfXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm64::lfXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm64::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm64::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm64::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm64::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm64::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm64::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
@ -114,39 +114,39 @@ static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm64::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm64::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm64::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{40, &JitArm64::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm64::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm64::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm64::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm64::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm64::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm64::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm64::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm64::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm64::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm64::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm64::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm64::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
};
static GekkoOPTemplate table4_2[] =
{
{10, &JitArm64::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm64::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm64::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm64::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm64::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm64::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm64::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitArm64::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitArm64::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitArm64::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitArm64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitArm64::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}},
{10, &JitArm64::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm64::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm64::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm64::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm64::ps_madds0}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm64::ps_madds1}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm64::ps_div}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitArm64::ps_sub}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitArm64::ps_add}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitArm64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitArm64::ps_res}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitArm64::ps_mul}, //"ps_mul", OPTYPE_PS, 0}},
{26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &JitArm64::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitArm64::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitArm64::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitArm64::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}},
{28, &JitArm64::ps_msub}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitArm64::ps_madd}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitArm64::ps_nmsub}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitArm64::ps_nmadd}, //"ps_nmadd", OPTYPE_PS, 0}},
};
@ -196,7 +196,7 @@ static GekkoOPTemplate table31[] =
{954, &JitArm64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{824, &JitArm64::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{824, &JitArm64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
@ -208,24 +208,24 @@ static GekkoOPTemplate table31[] =
{1014, &JitArm64::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}},
//load word
{23, &JitArm64::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{55, &JitArm64::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
{23, &JitArm64::lXX}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{55, &JitArm64::lXX}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword
{279, &JitArm64::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{311, &JitArm64::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
{279, &JitArm64::lXX}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{311, &JitArm64::lXX}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword signextend
{343, &JitArm64::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{375, &JitArm64::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
{343, &JitArm64::lXX}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{375, &JitArm64::lXX}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte
{87, &JitArm64::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{119, &JitArm64::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
{87, &JitArm64::lXX}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{119, &JitArm64::lXX}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte reverse
{534, &JitArm64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{790, &JitArm64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{534, &JitArm64::lXX}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{790, &JitArm64::lXX}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
// Conditional load/store (Wii SMP)
{150, &JitArm64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}},
@ -236,16 +236,16 @@ static GekkoOPTemplate table31[] =
{597, &JitArm64::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}},
//store word
{151, &JitArm64::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{183, &JitArm64::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
{151, &JitArm64::stX}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{183, &JitArm64::stX}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store halfword
{407, &JitArm64::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{439, &JitArm64::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
{407, &JitArm64::stX}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{439, &JitArm64::stX}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store byte
{215, &JitArm64::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{247, &JitArm64::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
{215, &JitArm64::stX}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{247, &JitArm64::stX}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store bytereverse
{662, &JitArm64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
@ -255,15 +255,15 @@ static GekkoOPTemplate table31[] =
{725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}},
// fp load/store
{535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{535, &JitArm64::lfXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm64::lfXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm64::lfXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm64::lfXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{663, &JitArm64::stfXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm64::stfXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm64::stfXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm64::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},
@ -272,9 +272,9 @@ static GekkoOPTemplate table31[] =
{146, &JitArm64::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{210, &JitArm64::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}},
{242, &JitArm64::mtsrin}, //"mtsrin", OPTYPE_SYSTEM, 0}},
{339, &JitArm64::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &JitArm64::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &JitArm64::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{339, &JitArm64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &JitArm64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &JitArm64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &JitArm64::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}},
{595, &JitArm64::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &JitArm64::mfsrin}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
@ -294,25 +294,25 @@ static GekkoOPTemplate table31[] =
static GekkoOPTemplate table31_2[] =
{
{266, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{778, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{10, &JitArm64::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &JitArm64::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{266, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{778, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{10, &JitArm64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &JitArm64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &JitArm64::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &JitArm64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{235, &JitArm64::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{747, &JitArm64::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{235, &JitArm64::mullwx}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{747, &JitArm64::mullwx}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{104, &JitArm64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{40, &JitArm64::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{552, &JitArm64::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{40, &JitArm64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{552, &JitArm64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
@ -323,27 +323,27 @@ static GekkoOPTemplate table31_2[] =
static GekkoOPTemplate table59[] =
{
{18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}},
{20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{20, &JitArm64::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
// {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}},
{24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::fmsubsx}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::fmaddsx}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::fnmsubsx}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
};
static GekkoOPTemplate table63[] =
{
{264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{264, &JitArm64::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
{0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm64::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm64::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
{64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},
@ -357,16 +357,16 @@ static GekkoOPTemplate table63[] =
static GekkoOPTemplate table63_2[] =
{
{18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}},
{20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}},
{20, &JitArm64::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}},
{22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitArm64::fselx}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::fmsubx}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::fmaddx}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::fnmsubx}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::fnmaddx}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
};