Merge pull request #1852 from Sonicadvance1/AArch64_Recompiler_Improvements

Implements a large portion of the recompiler for AArch64
This commit is contained in:
Ryan Houdek 2015-01-11 07:08:16 -06:00
commit 83a415198d
13 changed files with 2147 additions and 157 deletions

View File

@ -225,8 +225,11 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitArm64_RegCache.cpp PowerPC/JitArm64/JitArm64_RegCache.cpp
PowerPC/JitArm64/JitArm64_BackPatch.cpp PowerPC/JitArm64/JitArm64_BackPatch.cpp
PowerPC/JitArm64/JitArm64_Branch.cpp PowerPC/JitArm64/JitArm64_Branch.cpp
PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
PowerPC/JitArm64/JitArm64_Integer.cpp PowerPC/JitArm64/JitArm64_Integer.cpp
PowerPC/JitArm64/JitArm64_LoadStore.cpp PowerPC/JitArm64/JitArm64_LoadStore.cpp
PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
PowerPC/JitArm64/JitArm64_Paired.cpp
PowerPC/JitArm64/JitArm64_SystemRegisters.cpp PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
PowerPC/JitArm64/JitArm64_Tables.cpp) PowerPC/JitArm64/JitArm64_Tables.cpp)
endif() endif()

View File

@ -16,6 +16,7 @@ void JitArm64::Init()
{ {
AllocCodeSpace(CODE_SIZE); AllocCodeSpace(CODE_SIZE);
jo.enableBlocklink = true; jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
gpr.Init(this); gpr.Init(this);
fpr.Init(this); fpr.Init(this);
@ -179,6 +180,14 @@ void JitArm64::WriteExitDestInR(ARM64Reg Reg)
BR(EncodeRegTo64(Reg)); BR(EncodeRegTo64(Reg));
} }
void JitArm64::DumpCode(const u8* start, const u8* end)
{
std::string output = "";
for (u8* code = (u8*)start; code < end; code += 4)
output += StringFromFormat("%08x", Common::swap32(*(u32*)code));
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
}
void JitArm64::Run() void JitArm64::Run()
{ {
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
@ -281,6 +290,21 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
js.next_compilerPC = ops[i + 1].address; js.next_compilerPC = ops[i + 1].address;
} }
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
gpr.Lock(W30);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
regs_in_use[W30] = 0;
ABI_PushRegisters(regs_in_use);
MOVI2R(X30, (u64)&GPFifo::CheckGatherPipe);
BLR(X30);
ABI_PopRegisters(regs_in_use);
gpr.Unlock(W30);
}
if (!ops[i].skip) if (!ops[i].skip)
{ {
if (js.memcheck && (opinfo->flags & FL_USE_FPU)) if (js.memcheck && (opinfo->flags & FL_USE_FPU))
@ -294,6 +318,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
// If we have a register that will never be used again, flush it. // If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse) for (int j : ~ops[i].gprInUse)
gpr.StoreRegister(j); gpr.StoreRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreRegister(j);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{ {

View File

@ -21,11 +21,13 @@
// Some asserts to make sure we will be able to load everything // Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock
{ {
public: public:
JitArm64() : code_buffer(32000) {} JitArm64() : code_buffer(32000), m_float_emit(this) {}
~JitArm64() {} ~JitArm64() {}
void Init(); void Init();
@ -80,6 +82,7 @@ public:
// Integer // Integer
void arith_imm(UGeckoInstruction inst); void arith_imm(UGeckoInstruction inst);
void boolX(UGeckoInstruction inst); void boolX(UGeckoInstruction inst);
void addx(UGeckoInstruction inst);
void extsXx(UGeckoInstruction inst); void extsXx(UGeckoInstruction inst);
void cntlzwx(UGeckoInstruction inst); void cntlzwx(UGeckoInstruction inst);
void negx(UGeckoInstruction inst); void negx(UGeckoInstruction inst);
@ -87,6 +90,14 @@ public:
void cmpl(UGeckoInstruction inst); void cmpl(UGeckoInstruction inst);
void cmpi(UGeckoInstruction inst); void cmpi(UGeckoInstruction inst);
void cmpli(UGeckoInstruction inst); void cmpli(UGeckoInstruction inst);
void rlwinmx(UGeckoInstruction inst);
void srawix(UGeckoInstruction inst);
void mullwx(UGeckoInstruction inst);
void addic(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
void subfx(UGeckoInstruction inst);
void addcx(UGeckoInstruction inst);
// System Registers // System Registers
void mtmsr(UGeckoInstruction inst); void mtmsr(UGeckoInstruction inst);
@ -97,12 +108,66 @@ public:
void mfsrin(UGeckoInstruction inst); void mfsrin(UGeckoInstruction inst);
void mtsrin(UGeckoInstruction inst); void mtsrin(UGeckoInstruction inst);
void twx(UGeckoInstruction inst); void twx(UGeckoInstruction inst);
void mfspr(UGeckoInstruction inst);
void mftb(UGeckoInstruction inst);
void mtspr(UGeckoInstruction inst);
// LoadStore // LoadStore
void icbi(UGeckoInstruction inst); void icbi(UGeckoInstruction inst);
void lXX(UGeckoInstruction inst); void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst); void stX(UGeckoInstruction inst);
// LoadStore floating point
void lfXX(UGeckoInstruction inst);
void stfXX(UGeckoInstruction inst);
// Floating point
void fabsx(UGeckoInstruction inst);
void faddsx(UGeckoInstruction inst);
void faddx(UGeckoInstruction inst);
void fmaddsx(UGeckoInstruction inst);
void fmaddx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst);
void fmsubsx(UGeckoInstruction inst);
void fmsubx(UGeckoInstruction inst);
void fmulsx(UGeckoInstruction inst);
void fmulx(UGeckoInstruction inst);
void fnabsx(UGeckoInstruction inst);
void fnegx(UGeckoInstruction inst);
void fnmaddsx(UGeckoInstruction inst);
void fnmaddx(UGeckoInstruction inst);
void fnmsubsx(UGeckoInstruction inst);
void fnmsubx(UGeckoInstruction inst);
void fselx(UGeckoInstruction inst);
void fsubsx(UGeckoInstruction inst);
void fsubx(UGeckoInstruction inst);
// Paired
void ps_abs(UGeckoInstruction inst);
void ps_add(UGeckoInstruction inst);
void ps_div(UGeckoInstruction inst);
void ps_madd(UGeckoInstruction inst);
void ps_madds0(UGeckoInstruction inst);
void ps_madds1(UGeckoInstruction inst);
void ps_merge00(UGeckoInstruction inst);
void ps_merge01(UGeckoInstruction inst);
void ps_merge10(UGeckoInstruction inst);
void ps_merge11(UGeckoInstruction inst);
void ps_mr(UGeckoInstruction inst);
void ps_msub(UGeckoInstruction inst);
void ps_mul(UGeckoInstruction inst);
void ps_muls0(UGeckoInstruction inst);
void ps_muls1(UGeckoInstruction inst);
void ps_nabs(UGeckoInstruction inst);
void ps_nmadd(UGeckoInstruction inst);
void ps_nmsub(UGeckoInstruction inst);
void ps_neg(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst);
void ps_sub(UGeckoInstruction inst);
void ps_sum0(UGeckoInstruction inst);
void ps_sum1(UGeckoInstruction inst);
private: private:
Arm64GPRCache gpr; Arm64GPRCache gpr;
Arm64FPRCache fpr; Arm64FPRCache fpr;
@ -112,6 +177,11 @@ private:
PPCAnalyst::CodeBuffer code_buffer; PPCAnalyst::CodeBuffer code_buffer;
ARM64FloatEmitter m_float_emit;
// Dump a memory range of code
void DumpCode(const u8* start, const u8* end);
// The key is the backpatch flags // The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info; std::map<u32, BackPatchInfo> m_backpatch_info;
@ -137,6 +207,8 @@ private:
void ComputeRC(Arm64Gen::ARM64Reg reg, int crf = 0); void ComputeRC(Arm64Gen::ARM64Reg reg, int crf = 0);
void ComputeRC(u32 imm, int crf = 0); void ComputeRC(u32 imm, int crf = 0);
void ComputeCarry(bool Carry);
void ComputeCarry();
typedef u32 (*Operation)(u32, u32); typedef u32 (*Operation)(u32, u32);
void reg_imm(u32 d, u32 a, bool binary, u32 value, Operation do_op, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, ArithOption), bool Rc = false); void reg_imm(u32 d, u32 a, bool binary, u32 value, Operation do_op, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, ArithOption), bool Rc = false);

View File

@ -29,7 +29,8 @@ static void DoBacktrace(uintptr_t access_address, SContext* ctx)
for (u64 pc = (ctx->CTX_PC - 32); pc < (ctx->CTX_PC + 32); pc += 16) for (u64 pc = (ctx->CTX_PC - 32); pc < (ctx->CTX_PC + 32); pc += 16)
{ {
pc_memory += StringFromFormat("%08x%08x%08x%08x", pc_memory += StringFromFormat("%08x%08x%08x%08x",
*(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12)); Common::swap32(*(u32*)pc), Common::swap32(*(u32*)(pc + 4)),
Common::swap32(*(u32*)(pc + 8)), Common::swap32(*(u32*)(pc + 12)));
ERROR_LOG(DYNA_REC, "0x%016lx: %08x %08x %08x %08x", ERROR_LOG(DYNA_REC, "0x%016lx: %08x %08x %08x %08x",
pc, *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12)); pc, *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12));
@ -51,10 +52,34 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
*flags |= BackPatchInfo::FLAG_SIZE_8; *flags |= BackPatchInfo::FLAG_SIZE_8;
else if (size == 1) // 16-bit else if (size == 1) // 16-bit
*flags |= BackPatchInfo::FLAG_SIZE_16; *flags |= BackPatchInfo::FLAG_SIZE_16;
else // 32-bit else if (size == 2) // 32-bit
*flags |= BackPatchInfo::FLAG_SIZE_32; *flags |= BackPatchInfo::FLAG_SIZE_32;
else if (size == 3) // 64-bit
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (op == 0xE5) // Load if (op == 0xF5) // NEON LDR
{
if (size == 2) // 32-bit float
{
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
*flags |= BackPatchInfo::FLAG_LOAD;
*reg = (ARM64Reg)(inst & 0x1F);
return true;
}
else if (op == 0xF4) // NEON STR
{
if (size == 2) // 32-bit float
{
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
*flags |= BackPatchInfo::FLAG_STORE;
*reg = (ARM64Reg)(inst & 0x1F);
return true;
}
else if (op == 0xE5) // Load
{ {
*flags |= BackPatchInfo::FLAG_LOAD; *flags |= BackPatchInfo::FLAG_LOAD;
*reg = (ARM64Reg)(inst & 0x1F); *reg = (ARM64Reg)(inst & 0x1F);
@ -90,10 +115,38 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
if (flags & BackPatchInfo::FLAG_STORE && if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.FCVT(32, 64, Q0, RS);
float_emit.REV32(8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
}
else
{
float_emit.REV64(8, Q0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
}
} }
else if (flags & BackPatchInfo::FLAG_LOAD && else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.LD1R(32, RS, addr);
float_emit.REV64(8, RS, RS);
float_emit.FCVTL(64, RS, RS);
}
else
{
float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
float_emit.REV64(8, Q0, Q0);
float_emit.INS(64, RS, 0, Q0, 0);
}
} }
else if (flags & BackPatchInfo::FLAG_STORE) else if (flags & BackPatchInfo::FLAG_STORE)
{ {
@ -143,10 +196,39 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
if (flags & BackPatchInfo::FLAG_STORE && if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.FCVT(32, 64, Q0, RS);
float_emit.FMOV(32, false, W0, Q0);
emit->MOVI2R(X30, (u64)&Memory::Write_U32);
emit->BLR(X30);
}
else
{
emit->MOVI2R(X30, (u64)&Memory::Write_F64);
float_emit.DUP(64, Q0, RS);
emit->BLR(X30);
}
} }
else if (flags & BackPatchInfo::FLAG_LOAD && else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOVI2R(X30, (u64)&Memory::Read_U32);
emit->BLR(X30);
float_emit.DUP(32, RS, X0);
float_emit.FCVTL(64, RS, RS);
}
else
{
emit->MOVI2R(X30, (u64)&Memory::Read_F64);
emit->BLR(X30);
float_emit.INS(64, RS, 0, X0);
}
} }
else if (flags & BackPatchInfo::FLAG_STORE) else if (flags & BackPatchInfo::FLAG_STORE)
{ {
@ -245,7 +327,8 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
ctx->CTX_PC = new_pc; ctx->CTX_PC = new_pc;
// Wipe the top bits of the addr_register // Wipe the top bits of the addr_register
if (flags & BackPatchInfo::FLAG_STORE) if (flags & BackPatchInfo::FLAG_STORE &&
!(flags & BackPatchInfo::FLAG_SIZE_F64))
ctx->CTX_REG(1) &= 0xFFFFFFFFUll; ctx->CTX_REG(1) &= 0xFFFFFFFFUll;
else else
ctx->CTX_REG(0) &= 0xFFFFFFFFUll; ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
@ -382,6 +465,46 @@ void JitArm64::InitBackpatch()
SetCodePtr(code_base); SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info; m_backpatch_info[flags] = info;
} }
} }
@ -446,6 +569,46 @@ void JitArm64::InitBackpatch()
SetCodePtr(code_base); SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info; m_backpatch_info[flags] = info;
} }
} }

View File

@ -0,0 +1,376 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Common/StringUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::fabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(64, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::faddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FADD(64, VD, VA, VB);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::faddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FADD(64, V0, VA, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.INS(64, VD, 0, VB, 0);
}
void JitArm64::fmsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fmulsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FMUL(64, VD, VA, VC);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fmulx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(64, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FB);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FNEG(64, V0, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.DUP(64, VD, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fnmsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, V0, V0, VB);
m_float_emit.FNEG(64, V0, V0);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fselx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD,
inst.FD == inst.FA ||
inst.FD == inst.FB ||
inst.FD == inst.FC);
ARM64Reg V0 = fpr.GetReg();
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VC = gpr.R(inst.FC);
m_float_emit.FCMPE(VA);
m_float_emit.FCSEL(V0, VC, VB, CC_GE);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}
void JitArm64::fsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
m_float_emit.FSUB(64, VD, VA, VB);
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FSUB(64, V0, VA, VB);
m_float_emit.INS(64, VD, 0, V0, 0);
fpr.Unlock(V0);
}

View File

@ -39,6 +39,28 @@ void JitArm64::ComputeRC(u32 imm, int crf)
gpr.Unlock(WA); gpr.Unlock(WA);
} }
void JitArm64::ComputeCarry(bool Carry)
{
if (Carry)
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, 1);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
return;
}
STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca));
}
void JitArm64::ComputeCarry()
{
ARM64Reg WA = gpr.GetReg();
CSINC(WA, WSP, WSP, CC_CC);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
}
// Following static functions are used in conjunction with reg_imm // Following static functions are used in conjunction with reg_imm
static u32 Add(u32 a, u32 b) static u32 Add(u32 a, u32 b)
{ {
@ -245,6 +267,29 @@ void JitArm64::boolX(UGeckoInstruction inst)
} }
} }
void JitArm64::addx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, i + j);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
ADD(gpr.R(d), gpr.R(a), gpr.R(b));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::extsXx(UGeckoInstruction inst) void JitArm64::extsXx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -415,3 +460,237 @@ void JitArm64::cmpli(UGeckoInstruction inst)
FALLBACK_IF(true); FALLBACK_IF(true);
} }
void JitArm64::rlwinmx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
u32 mask = Helper_Mask(inst.MB,inst.ME);
if (gpr.IsImm(inst.RS))
{
gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask);
if (inst.Rc)
ComputeRC(gpr.GetImm(inst.RA), 0);
return;
}
gpr.BindToRegister(inst.RA, inst.RA == inst.RS);
ARM64Reg WA = gpr.GetReg();
ArithOption Shift(gpr.R(inst.RS), ST_ROR, 32 - inst.SH);
MOVI2R(WA, mask);
AND(gpr.R(inst.RA), WA, gpr.R(inst.RS), Shift);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(inst.RA), 0);
}
void JitArm64::srawix(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA;
int s = inst.RS;
int amount = inst.SH;
if (gpr.IsImm(s))
{
s32 imm = (s32)gpr.GetImm(s);
gpr.SetImmediate(a, imm >> amount);
if (amount != 0 && (imm < 0) && (imm << (32 - amount)))
ComputeCarry(true);
else
ComputeCarry(false);
}
else if (amount != 0)
{
gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s);
ARM64Reg WA = gpr.GetReg();
ORR(WA, WSP, RS, ArithOption(RS, ST_LSL, 32 - amount));
ORR(RA, WSP, RS, ArithOption(RS, ST_ASR, amount));
if (inst.Rc)
ComputeRC(RA, 0);
ANDS(WSP, WA, RA, ArithOption(RA, ST_LSL, 0));
CSINC(WA, WSP, WSP, CC_EQ);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
}
else
{
gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s);
MOV(RA, RS);
STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca));
}
}
void JitArm64::addic(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
bool rc = inst.OPCD == 13;
s32 simm = inst.SIMM_16;
u32 imm = (u32)simm;
if (gpr.IsImm(a))
{
u32 i = gpr.GetImm(a);
gpr.SetImmediate(d, i + imm);
bool has_carry = Interpreter::Helper_Carry(i, imm);
ComputeCarry(has_carry);
if (rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a);
if (imm < 4096)
{
ADDS(gpr.R(d), gpr.R(a), imm);
}
else if (simm > -4096 && simm < 0)
{
SUBS(gpr.R(d), gpr.R(a), std::abs(simm));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, imm);
ADDS(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
ComputeCarry();
if (rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::mulli(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, d = inst.RD;
if (gpr.IsImm(a))
{
s32 i = (s32)gpr.GetImm(a);
gpr.SetImmediate(d, i * inst.SIMM_16);
}
else
{
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, (u32)(s32)inst.SIMM_16);
MUL(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
}
void JitArm64::mullwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, i * j);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a || d == b);
MUL(gpr.R(d), gpr.R(a), gpr.R(b));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::addzex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, d = inst.RD;
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
CMP(WA, 1);
CSINC(gpr.R(d), gpr.R(a), gpr.R(a), CC_NEQ);
CMP(gpr.R(d), 0);
gpr.Unlock(WA);
ComputeCarry();
}
void JitArm64::subfx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, j - i);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
SUB(gpr.R(d), gpr.R(b), gpr.R(a));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::addcx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, i * j);
bool has_carry = Interpreter::Helper_Carry(i, j);
ComputeCarry(has_carry);
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a || d == b);
ADDS(gpr.R(d), gpr.R(a), gpr.R(b));
ComputeCarry();
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}

View File

@ -41,6 +41,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
off_reg = gpr.R(offsetReg); off_reg = gpr.R(offsetReg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 ignore_mask(0); BitSet32 ignore_mask(0);
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W30] = 0; regs_in_use[W30] = 0;
@ -114,25 +115,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate) if (is_immediate)
MOVI2R(XA, imm_addr); MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(addr), addr_reg);
if (is_immediate && Memory::IsRAMAddress(imm_addr)) if (is_immediate && Memory::IsRAMAddress(imm_addr))
{ {
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA);
if (update)
MOVI2R(up_reg, imm_addr);
} }
else else
{ {
if (update)
MOV(up_reg, addr_reg);
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags, EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
dest_reg, XA); dest_reg, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use, ignore_mask); ABI_PopRegisters(regs_in_use, ignore_mask);
} }
@ -155,6 +155,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
reg_dest = gpr.R(dest); reg_dest = gpr.R(dest);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W1] = 0; regs_in_use[W1] = 0;
regs_in_use[W30] = 0; regs_in_use[W30] = 0;
@ -237,10 +238,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags, EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
RS, XA); RS, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }
@ -321,8 +324,6 @@ void JitArm64::lXX(UGeckoInstruction inst)
break; break;
} }
FALLBACK_IF(update);
SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update);
// LWZ idle skipping // LWZ idle skipping

View File

@ -0,0 +1,394 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
s32 offset_reg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 567: // lfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offset_reg = b;
break;
case 535: // lfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offset_reg = b;
break;
case 599: // lfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offset_reg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 48: // lfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg addr_reg = W0;
gpr.Lock(W0, W30);
fpr.Lock(Q0);
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offset_reg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else
{
if (offset_reg == -1)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
ADD(addr_reg, gpr.R(offset_reg), gpr.R(a));
}
}
}
else
{
if (offset_reg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else if (!a && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offset_reg);
}
else if (a)
{
ADD(addr_reg, gpr.R(a), gpr.R(offset_reg));
}
else
{
MOV(addr_reg, gpr.R(offset_reg));
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(a), addr_reg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 fpr_ignore_mask(0);
regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0
fpr_ignore_mask[VD - Q0] = 1;
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, VD, XA);
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
VD, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use, fpr_ignore_mask);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W30);
fpr.Unlock(Q0);
}
void JitArm64::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
s32 offset_reg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 663: // stfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 695: // stfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
offset_reg = b;
break;
case 727: // stfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offset_reg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offset_reg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 52: // stfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg V0 = fpr.R(inst.FS);
ARM64Reg addr_reg;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
addr_reg = W0;
else
addr_reg = W1;
gpr.Lock(W0, W1, W30);
fpr.Lock(Q0);
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offset_reg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else
{
if (offset_reg == -1)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
ADD(addr_reg, gpr.R(offset_reg), gpr.R(a));
}
}
}
else
{
if (offset_reg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg);
}
else if (!a && gpr.IsImm(offset_reg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offset_reg);
}
else if (a)
{
ADD(addr_reg, gpr.R(a), gpr.R(offset_reg));
}
else
{
MOV(addr_reg, gpr.R(offset_reg));
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
MOV(gpr.R(a), addr_reg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0
if (is_immediate)
{
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount);
MOVI2R(X1, (u64)GPFifo::m_gatherPipe);
LDR(INDEX_UNSIGNED, W0, X30, 0);
ADD(X1, X1, X0);
if (accessSize == 64)
{
m_float_emit.REV64(8, Q0, V0);
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0);
}
else if (accessSize == 32)
{
m_float_emit.FCVT(32, 64, Q0, V0);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0);
}
ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, 0);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, V0, XA);
}
else
{
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags, false, false, V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W1, W30);
fpr.Unlock(Q0);
}

View File

@ -0,0 +1,495 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Common/StringUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
void JitArm64::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(64, VD, VB);
}
void JitArm64::ps_add(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FADD(64, VD, VA, VB);
}
void JitArm64::ps_div(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FDIV(64, VD, VA, VB);
}
void JitArm64::ps_madd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_madds0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
m_float_emit.FMUL(64, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_madds1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
m_float_emit.FMUL(64, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.TRN1(64, VD, VA, VB);
}
void JitArm64::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.INS(64, VD, 0, VA, 0);
m_float_emit.INS(64, VD, 1, VB, 1);
}
void JitArm64::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
if (d != a && d != b)
{
m_float_emit.INS(64, VD, 0, VA, 1);
m_float_emit.INS(64, VD, 1, VB, 0);
}
else
{
ARM64Reg V0 = fpr.GetReg();
m_float_emit.INS(64, V0, 0, VA, 1);
m_float_emit.INS(64, V0, 1, VB, 0);
m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0);
}
}
void JitArm64::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.TRN2(64, VD, VA, VB);
}
void JitArm64::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
if (d == b)
return;
fpr.BindToRegister(d, false);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.ORR(VD, VB, VB);
}
void JitArm64::ps_mul(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMUL(64, VD, VA, VC);
}
void JitArm64::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
m_float_emit.FMUL(64, VD, VA, V0);
fpr.Unlock(V0);
}
void JitArm64::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
m_float_emit.FMUL(64, VD, VA, V0);
fpr.Unlock(V0);
}
void JitArm64::ps_msub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
fpr.Unlock(V0);
}
void JitArm64::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(64, VD, VB);
m_float_emit.FNEG(64, VD, VD);
}
void JitArm64::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FNEG(64, VD, VB);
}
void JitArm64::ps_nmadd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0);
}
void JitArm64::ps_nmsub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0);
}
void JitArm64::ps_res(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FRSQRTE(64, VD, VB);
}
void JitArm64::ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
if (d != a && d != b && d != c)
{
m_float_emit.FCMGE(64, VD, VA);
m_float_emit.BSL(VD, VC, VB);
}
else
{
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FCMGE(64, V0, VA);
m_float_emit.BSL(V0, VC, VB);
m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0);
}
}
void JitArm64::ps_sub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FSUB(64, VD, VA, VB);
}
void JitArm64::ps_sum0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VB, 1);
if (d != c)
{
m_float_emit.FADD(64, VD, V0, VA);
m_float_emit.INS(64, VD, 1, VC, 1);
}
else
{
m_float_emit.FADD(64, V0, V0, VA);
m_float_emit.INS(64, VD, 0, V0, 0);
}
fpr.Unlock(V0);
}
void JitArm64::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VA, 0);
if (d != c)
{
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.INS(64, VD, 0, VC, 0);
}
else
{
m_float_emit.FADD(64, V0, V0, VB);
m_float_emit.INS(64, VD, 1, V0, 1);
}
fpr.Unlock(V0);
}

View File

@ -10,6 +10,7 @@ using namespace Arm64Gen;
void Arm64RegCache::Init(ARM64XEmitter *emitter) void Arm64RegCache::Init(ARM64XEmitter *emitter)
{ {
m_emit = emitter; m_emit = emitter;
m_float_emit.reset(new ARM64FloatEmitter(m_emit));
GetAllocationOrder(); GetAllocationOrder();
} }
@ -56,6 +57,23 @@ void Arm64RegCache::UnlockRegister(ARM64Reg host_reg)
reg->Unlock(); reg->Unlock();
} }
void Arm64RegCache::FlushMostStaleRegister()
{
u32 most_stale_preg = 0;
u32 most_stale_amount = 0;
for (u32 i = 0; i < 32; ++i)
{
u32 last_used = m_guest_registers[i].GetLastUsed();
if (last_used > most_stale_amount &&
m_guest_registers[i].GetType() == REG_REG)
{
most_stale_preg = i;
most_stale_amount = last_used;
}
}
FlushRegister(most_stale_preg, false);
}
// GPR Cache // GPR Cache
void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats) void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
@ -212,23 +230,6 @@ void Arm64GPRCache::GetAllocationOrder()
m_host_registers.push_back(HostReg(reg)); m_host_registers.push_back(HostReg(reg));
} }
void Arm64GPRCache::FlushMostStaleRegister()
{
u32 most_stale_preg = 0;
u32 most_stale_amount = 0;
for (u32 i = 0; i < 32; ++i)
{
u32 last_used = m_guest_registers[i].GetLastUsed();
if (last_used > most_stale_amount &&
m_guest_registers[i].GetType() == REG_REG)
{
most_stale_preg = i;
most_stale_amount = last_used;
}
}
FlushRegister(most_stale_preg, false);
}
BitSet32 Arm64GPRCache::GetCallerSavedUsed() BitSet32 Arm64GPRCache::GetCallerSavedUsed()
{ {
BitSet32 registers(0); BitSet32 registers(0);
@ -254,35 +255,120 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg)
// FPR Cache // FPR Cache
void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
{ {
// XXX: Flush our stuff for (int i = 0; i < 32; ++i)
{
bool flush = true;
if (mode == FLUSH_INTERPRETER)
{
if (!(op->regsOut[i] || op->regsIn[i]))
{
// This interpreted instruction doesn't use this register
flush = false;
}
}
if (m_guest_registers[i].GetType() == REG_REG)
{
// Has to be flushed if it isn't in a callee saved register
ARM64Reg host_reg = m_guest_registers[i].GetReg();
if (flush || !IsCalleeSaved(host_reg))
FlushRegister(i, mode == FLUSH_MAINTAIN_STATE);
}
}
} }
ARM64Reg Arm64FPRCache::R(u32 preg) ARM64Reg Arm64FPRCache::R(u32 preg)
{ {
// XXX: return a host reg holding a guest register OpArg& reg = m_guest_registers[preg];
IncrementAllUsed();
reg.ResetLastUsed();
switch (reg.GetType())
{
case REG_REG: // already in a reg
return reg.GetReg();
break;
case REG_NOTLOADED: // Register isn't loaded at /all/
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
return host_reg;
}
break;
default:
_dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!");
break;
}
// We've got an issue if we end up here
return INVALID_REG;
}
void Arm64FPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
if (do_load)
m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
}
} }
void Arm64FPRCache::GetAllocationOrder() void Arm64FPRCache::GetAllocationOrder()
{ {
const std::vector<ARM64Reg> allocation_order = const std::vector<ARM64Reg> allocation_order =
{ {
D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, // Callee saved
D11, D12, D13, D14, D15, D16, D17, D18, D19, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
D20, D21, D22, D23, D24, D25, D26, D27, D28,
D29, D30, D31, // Caller saved
Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23,
Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31,
Q7, Q6, Q5, Q4, Q3, Q2, Q1, Q0
}; };
for (ARM64Reg reg : allocation_order) for (ARM64Reg reg : allocation_order)
m_host_registers.push_back(HostReg(reg)); m_host_registers.push_back(HostReg(reg));
} }
void Arm64FPRCache::FlushMostStaleRegister()
{
// XXX: Flush a register
}
void Arm64FPRCache::FlushByHost(ARM64Reg host_reg) void Arm64FPRCache::FlushByHost(ARM64Reg host_reg)
{ {
// XXX: Scan guest registers and flush if found // XXX: Scan guest registers and flush if found
} }
bool Arm64FPRCache::IsCalleeSaved(ARM64Reg reg)
{
static std::vector<ARM64Reg> callee_regs =
{
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, INVALID_REG,
};
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
}
void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_REG)
{
ARM64Reg host_reg = reg.GetReg();
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state)
{
UnlockRegister(host_reg);
reg.Flush();
}
}
}
BitSet32 Arm64FPRCache::GetCallerSavedUsed()
{
BitSet32 registers(0);
for (auto& it : m_host_registers)
if (it.IsLocked())
registers[Q0 - it.GetReg()] = 1;
return registers;
}

View File

@ -119,7 +119,7 @@ private:
class Arm64RegCache class Arm64RegCache
{ {
public: public:
Arm64RegCache() : m_emit(nullptr), m_reg_stats(nullptr) {}; Arm64RegCache() : m_emit(nullptr), m_float_emit(nullptr), m_reg_stats(nullptr) {};
virtual ~Arm64RegCache() {}; virtual ~Arm64RegCache() {};
void Init(ARM64XEmitter *emitter); void Init(ARM64XEmitter *emitter);
@ -133,10 +133,14 @@ public:
// Will dump an immediate to the host register as well // Will dump an immediate to the host register as well
virtual ARM64Reg R(u32 reg) = 0; virtual ARM64Reg R(u32 reg) = 0;
virtual BitSet32 GetCallerSavedUsed() = 0;
// Returns a temporary register for use // Returns a temporary register for use
// Requires unlocking after done // Requires unlocking after done
ARM64Reg GetReg(); ARM64Reg GetReg();
void StoreRegister(u32 preg) { FlushRegister(preg, false); }
// Locks a register so a cache cannot use it // Locks a register so a cache cannot use it
// Useful for function calls // Useful for function calls
template<typename T = ARM64Reg, typename... Args> template<typename T = ARM64Reg, typename... Args>
@ -166,7 +170,7 @@ protected:
virtual void GetAllocationOrder() = 0; virtual void GetAllocationOrder() = 0;
// Flushes the most stale register // Flushes the most stale register
virtual void FlushMostStaleRegister() = 0; void FlushMostStaleRegister();
// Lock a register // Lock a register
void LockRegister(ARM64Reg host_reg); void LockRegister(ARM64Reg host_reg);
@ -177,15 +181,31 @@ protected:
// Flushes a guest register by host provided // Flushes a guest register by host provided
virtual void FlushByHost(ARM64Reg host_reg) = 0; virtual void FlushByHost(ARM64Reg host_reg) = 0;
virtual void FlushRegister(u32 preg, bool maintain_state) = 0;
// Get available host registers // Get available host registers
u32 GetUnlockedRegisterCount(); u32 GetUnlockedRegisterCount();
void IncrementAllUsed()
{
for (auto& reg : m_guest_registers)
reg.IncrementLastUsed();
}
// Code emitter // Code emitter
ARM64XEmitter *m_emit; ARM64XEmitter *m_emit;
// Float emitter
std::unique_ptr<ARM64FloatEmitter> m_float_emit;
// Host side registers that hold the host registers in order of use // Host side registers that hold the host registers in order of use
std::vector<HostReg> m_host_registers; std::vector<HostReg> m_host_registers;
// Our guest GPRs
// PowerPC has 32 GPRs
// PowerPC also has 32 paired FPRs
OpArg m_guest_registers[32];
// Register stats for the current block // Register stats for the current block
PPCAnalyst::BlockRegStats *m_reg_stats; PPCAnalyst::BlockRegStats *m_reg_stats;
}; };
@ -215,34 +235,20 @@ public:
void BindToRegister(u32 preg, bool do_load); void BindToRegister(u32 preg, bool do_load);
void StoreRegister(u32 preg) { FlushRegister(preg, false); } BitSet32 GetCallerSavedUsed() override;
BitSet32 GetCallerSavedUsed();
protected: protected:
// Get the order of the host registers // Get the order of the host registers
void GetAllocationOrder(); void GetAllocationOrder();
// Flushes the most stale register
void FlushMostStaleRegister();
// Flushes a guest register by host provided // Flushes a guest register by host provided
void FlushByHost(ARM64Reg host_reg) override; void FlushByHost(ARM64Reg host_reg) override;
// Our guest GPRs void FlushRegister(u32 preg, bool maintain_state) override;
// PowerPC has 32 GPRs
OpArg m_guest_registers[32];
private: private:
bool IsCalleeSaved(ARM64Reg reg); bool IsCalleeSaved(ARM64Reg reg);
void IncrementAllUsed()
{
for (auto& reg : m_guest_registers)
reg.IncrementLastUsed();
}
void FlushRegister(u32 preg, bool maintain_state);
}; };
class Arm64FPRCache : public Arm64RegCache class Arm64FPRCache : public Arm64RegCache
@ -256,17 +262,19 @@ public:
// Will dump an immediate to the host register as well // Will dump an immediate to the host register as well
ARM64Reg R(u32 preg); ARM64Reg R(u32 preg);
void BindToRegister(u32 preg, bool do_load);
BitSet32 GetCallerSavedUsed() override;
protected: protected:
// Get the order of the host registers // Get the order of the host registers
void GetAllocationOrder(); void GetAllocationOrder();
// Flushes the most stale register
void FlushMostStaleRegister();
// Flushes a guest register by host provided // Flushes a guest register by host provided
void FlushByHost(ARM64Reg host_reg) override; void FlushByHost(ARM64Reg host_reg) override;
// Our guest FPRs void FlushRegister(u32 preg, bool maintain_state) override;
// Gekko has 32 paired registers(32x2)
OpArg m_guest_registers[32][2]; private:
bool IsCalleeSaved(ARM64Reg reg);
}; };

View File

@ -196,3 +196,90 @@ void JitArm64::twx(UGeckoInstruction inst)
WriteExit(js.compilerPC + 4); WriteExit(js.compilerPC + 4);
} }
} }
void JitArm64::mfspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_XER:
case SPR_WPAR:
case SPR_DEC:
case SPR_TL:
case SPR_TU:
FALLBACK_IF(true);
default:
gpr.BindToRegister(inst.RD, false);
ARM64Reg RD = gpr.R(inst.RD);
LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
break;
}
}
void JitArm64::mftb(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
mfspr(inst);
}
void JitArm64::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_DMAU:
case SPR_SPRG0:
case SPR_SPRG1:
case SPR_SPRG2:
case SPR_SPRG3:
case SPR_SRR0:
case SPR_SRR1:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_LR:
case SPR_CTR:
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_XER:
{
FALLBACK_IF(true);
ARM64Reg RD = gpr.R(inst.RD);
ARM64Reg WA = gpr.GetReg();
ARM64Reg mask = gpr.GetReg();
MOVI2R(mask, 0xFF7F);
AND(WA, RD, mask, ArithOption(mask, ST_LSL, 0));
STRH(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_stringctrl));
UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
UBFM(WA, RD, XER_OV_SHIFT, 31); // Same as WA = RD >> XER_OV_SHIFT
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(WA, mask);
}
break;
default:
FALLBACK_IF(true);
}
// OK, this is easy.
ARM64Reg RD = gpr.R(inst.RD);
STR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
}

View File

@ -45,17 +45,17 @@ static GekkoOPTemplate primarytable[] =
{3, &JitArm64::twx}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {3, &JitArm64::twx}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, {17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}},
{7, &JitArm64::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {7, &JitArm64::mulli}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{10, &JitArm64::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {10, &JitArm64::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, &JitArm64::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &JitArm64::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, &JitArm64::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {12, &JitArm64::addic}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &JitArm64::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, {13, &JitArm64::addic}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{14, &JitArm64::arith_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {14, &JitArm64::arith_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{15, &JitArm64::arith_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {15, &JitArm64::arith_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, {20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}},
{21, &JitArm64::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {21, &JitArm64::rlwinmx}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, {23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}},
{24, &JitArm64::arith_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, {24, &JitArm64::arith_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}},
@ -84,15 +84,15 @@ static GekkoOPTemplate primarytable[] =
{46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, {48, &JitArm64::lfXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, {49, &JitArm64::lfXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, {50, &JitArm64::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, {51, &JitArm64::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, {52, &JitArm64::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {53, &JitArm64::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, {54, &JitArm64::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {55, &JitArm64::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, {56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, {57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
@ -114,39 +114,39 @@ static GekkoOPTemplate table4[] =
{ //SUBOP10 { //SUBOP10
{0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, {0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, {32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm64::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, {40, &JitArm64::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm64::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {136, &JitArm64::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm64::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, {264, &JitArm64::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm64::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, {72, &JitArm64::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm64::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {528, &JitArm64::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm64::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {560, &JitArm64::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm64::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, {592, &JitArm64::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm64::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, {624, &JitArm64::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, {1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
}; };
static GekkoOPTemplate table4_2[] = static GekkoOPTemplate table4_2[] =
{ {
{10, &JitArm64::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}}, {10, &JitArm64::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm64::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}}, {11, &JitArm64::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm64::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}}, {12, &JitArm64::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm64::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}}, {13, &JitArm64::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm64::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}}, {14, &JitArm64::ps_madds0}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm64::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}}, {15, &JitArm64::ps_madds1}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm64::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}}, {18, &JitArm64::ps_div}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitArm64::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}}, {20, &JitArm64::ps_sub}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitArm64::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}}, {21, &JitArm64::ps_add}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitArm64::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, {23, &JitArm64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitArm64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, {24, &JitArm64::ps_res}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitArm64::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}}, {25, &JitArm64::ps_mul}, //"ps_mul", OPTYPE_PS, 0}},
{26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, {26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &JitArm64::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}}, {28, &JitArm64::ps_msub}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitArm64::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}}, {29, &JitArm64::ps_madd}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitArm64::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}}, {30, &JitArm64::ps_nmsub}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitArm64::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}}, {31, &JitArm64::ps_nmadd}, //"ps_nmadd", OPTYPE_PS, 0}},
}; };
@ -196,7 +196,7 @@ static GekkoOPTemplate table31[] =
{954, &JitArm64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, &JitArm64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{824, &JitArm64::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {824, &JitArm64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, {54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
@ -208,24 +208,24 @@ static GekkoOPTemplate table31[] =
{1014, &JitArm64::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, {1014, &JitArm64::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}},
//load word //load word
{23, &JitArm64::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {23, &JitArm64::lXX}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{55, &JitArm64::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {55, &JitArm64::lXX}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword //load halfword
{279, &JitArm64::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {279, &JitArm64::lXX}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{311, &JitArm64::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {311, &JitArm64::lXX}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword signextend //load halfword signextend
{343, &JitArm64::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {343, &JitArm64::lXX}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{375, &JitArm64::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {375, &JitArm64::lXX}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte //load byte
{87, &JitArm64::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {87, &JitArm64::lXX}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{119, &JitArm64::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {119, &JitArm64::lXX}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte reverse //load byte reverse
{534, &JitArm64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {534, &JitArm64::lXX}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{790, &JitArm64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {790, &JitArm64::lXX}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
// Conditional load/store (Wii SMP) // Conditional load/store (Wii SMP)
{150, &JitArm64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, {150, &JitArm64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}},
@ -236,16 +236,16 @@ static GekkoOPTemplate table31[] =
{597, &JitArm64::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, {597, &JitArm64::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}},
//store word //store word
{151, &JitArm64::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, {151, &JitArm64::stX}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{183, &JitArm64::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, {183, &JitArm64::stX}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store halfword //store halfword
{407, &JitArm64::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, {407, &JitArm64::stX}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{439, &JitArm64::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, {439, &JitArm64::stX}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store byte //store byte
{215, &JitArm64::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, {215, &JitArm64::stX}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{247, &JitArm64::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, {247, &JitArm64::stX}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store bytereverse //store bytereverse
{662, &JitArm64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, {662, &JitArm64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
@ -255,15 +255,15 @@ static GekkoOPTemplate table31[] =
{725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, {725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}},
// fp load/store // fp load/store
{535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, {535, &JitArm64::lfXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, {567, &JitArm64::lfXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, {599, &JitArm64::lfXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, {631, &JitArm64::lfXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, {663, &JitArm64::stfXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {695, &JitArm64::stfXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, {727, &JitArm64::stfXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {759, &JitArm64::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, {983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, {19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},
@ -272,9 +272,9 @@ static GekkoOPTemplate table31[] =
{146, &JitArm64::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {146, &JitArm64::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{210, &JitArm64::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}}, {210, &JitArm64::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}},
{242, &JitArm64::mtsrin}, //"mtsrin", OPTYPE_SYSTEM, 0}}, {242, &JitArm64::mtsrin}, //"mtsrin", OPTYPE_SYSTEM, 0}},
{339, &JitArm64::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, {339, &JitArm64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &JitArm64::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}}, {467, &JitArm64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &JitArm64::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, {371, &JitArm64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &JitArm64::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {512, &JitArm64::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}},
{595, &JitArm64::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {595, &JitArm64::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &JitArm64::mfsrin}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &JitArm64::mfsrin}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
@ -294,25 +294,25 @@ static GekkoOPTemplate table31[] =
static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table31_2[] =
{ {
{266, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {266, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{778, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {778, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{10, &JitArm64::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {10, &JitArm64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &JitArm64::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {522, &JitArm64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &JitArm64::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {202, &JitArm64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{235, &JitArm64::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {235, &JitArm64::mullwx}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{747, &JitArm64::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {747, &JitArm64::mullwx}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{104, &JitArm64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {104, &JitArm64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{40, &JitArm64::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {40, &JitArm64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{552, &JitArm64::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {552, &JitArm64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
@ -323,27 +323,27 @@ static GekkoOPTemplate table31_2[] =
static GekkoOPTemplate table59[] = static GekkoOPTemplate table59[] =
{ {
{18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, {18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}},
{20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {20, &JitArm64::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, {21, &JitArm64::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
// {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, // {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}},
{24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, {24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, &JitArm64::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitArm64::fmsubsx}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitArm64::fmaddsx}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &JitArm64::fnmsubsx}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, {31, &JitArm64::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
}; };
static GekkoOPTemplate table63[] = static GekkoOPTemplate table63[] =
{ {
{264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {264, &JitArm64::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, {32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
{0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, {0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, {72, &JitArm64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {136, &JitArm64::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, {40, &JitArm64::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, {12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
{64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, {64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},
@ -357,16 +357,16 @@ static GekkoOPTemplate table63[] =
static GekkoOPTemplate table63_2[] = static GekkoOPTemplate table63_2[] =
{ {
{18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, {18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}},
{20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {20, &JitArm64::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, {21, &JitArm64::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}},
{22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, {22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, {23, &JitArm64::fselx}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, &JitArm64::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitArm64::fmsubx}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitArm64::fmaddx}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &JitArm64::fnmsubx}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {31, &JitArm64::fnmaddx}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
}; };