diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 3af53428ac..7cbf4509f6 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -225,8 +225,11 @@ elseif(_M_ARM_64) PowerPC/JitArm64/JitArm64_RegCache.cpp PowerPC/JitArm64/JitArm64_BackPatch.cpp PowerPC/JitArm64/JitArm64_Branch.cpp + PowerPC/JitArm64/JitArm64_FloatingPoint.cpp PowerPC/JitArm64/JitArm64_Integer.cpp PowerPC/JitArm64/JitArm64_LoadStore.cpp + PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp + PowerPC/JitArm64/JitArm64_Paired.cpp PowerPC/JitArm64/JitArm64_SystemRegisters.cpp PowerPC/JitArm64/JitArm64_Tables.cpp) endif() diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 6f1b087854..29cbb62e1d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -16,6 +16,7 @@ void JitArm64::Init() { AllocCodeSpace(CODE_SIZE); jo.enableBlocklink = true; + jo.optimizeGatherPipe = true; gpr.Init(this); fpr.Init(this); @@ -179,6 +180,14 @@ void JitArm64::WriteExitDestInR(ARM64Reg Reg) BR(EncodeRegTo64(Reg)); } +void JitArm64::DumpCode(const u8* start, const u8* end) +{ + std::string output = ""; + for (u8* code = (u8*)start; code < end; code += 4) + output += StringFromFormat("%08x", Common::swap32(*(u32*)code)); + WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str()); +} + void JitArm64::Run() { CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; @@ -281,6 +290,21 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB js.next_compilerPC = ops[i + 1].address; } + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + { + js.fifoBytesThisBlock -= 32; + + gpr.Lock(W30); + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + regs_in_use[W30] = 0; + + ABI_PushRegisters(regs_in_use); + MOVI2R(X30, (u64)&GPFifo::CheckGatherPipe); + BLR(X30); + ABI_PopRegisters(regs_in_use); + gpr.Unlock(W30); + } + if (!ops[i].skip) { if (js.memcheck && (opinfo->flags & FL_USE_FPU)) @@ -294,6 +318,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB // If we have a register that will never be used again, flush it. for (int j : ~ops[i].gprInUse) gpr.StoreRegister(j); + for (int j : ~ops[i].fprInUse) + fpr.StoreRegister(j); if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) { diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index b0c4689bff..94e9e945eb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -21,11 +21,13 @@ // Some asserts to make sure we will be able to load everything static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); +static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!"); +static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock { public: - JitArm64() : code_buffer(32000) {} + JitArm64() : code_buffer(32000), m_float_emit(this) {} ~JitArm64() {} void Init(); @@ -80,6 +82,7 @@ public: // Integer void arith_imm(UGeckoInstruction inst); void boolX(UGeckoInstruction inst); + void addx(UGeckoInstruction inst); void extsXx(UGeckoInstruction inst); void cntlzwx(UGeckoInstruction inst); void negx(UGeckoInstruction inst); @@ -87,6 +90,14 @@ public: void cmpl(UGeckoInstruction inst); void cmpi(UGeckoInstruction inst); void cmpli(UGeckoInstruction inst); + void rlwinmx(UGeckoInstruction inst); + void srawix(UGeckoInstruction inst); + void mullwx(UGeckoInstruction inst); + void addic(UGeckoInstruction inst); + void mulli(UGeckoInstruction inst); + void addzex(UGeckoInstruction inst); + void subfx(UGeckoInstruction inst); + void addcx(UGeckoInstruction inst); // System Registers void mtmsr(UGeckoInstruction inst); @@ -97,12 +108,66 @@ public: void mfsrin(UGeckoInstruction inst); void mtsrin(UGeckoInstruction inst); void twx(UGeckoInstruction inst); + void mfspr(UGeckoInstruction inst); + void mftb(UGeckoInstruction inst); + void mtspr(UGeckoInstruction inst); // LoadStore void icbi(UGeckoInstruction inst); void lXX(UGeckoInstruction inst); void stX(UGeckoInstruction inst); + // LoadStore floating point + void lfXX(UGeckoInstruction inst); + void stfXX(UGeckoInstruction inst); + + // Floating point + void fabsx(UGeckoInstruction inst); + void faddsx(UGeckoInstruction inst); + void faddx(UGeckoInstruction inst); + void fmaddsx(UGeckoInstruction inst); + void fmaddx(UGeckoInstruction inst); + void fmrx(UGeckoInstruction inst); + void fmsubsx(UGeckoInstruction inst); + void fmsubx(UGeckoInstruction inst); + void fmulsx(UGeckoInstruction inst); + void fmulx(UGeckoInstruction inst); + void fnabsx(UGeckoInstruction inst); + void fnegx(UGeckoInstruction inst); + void fnmaddsx(UGeckoInstruction inst); + void fnmaddx(UGeckoInstruction inst); + void fnmsubsx(UGeckoInstruction inst); + void fnmsubx(UGeckoInstruction inst); + void fselx(UGeckoInstruction inst); + void fsubsx(UGeckoInstruction inst); + void fsubx(UGeckoInstruction inst); + + // Paired + void ps_abs(UGeckoInstruction inst); + void ps_add(UGeckoInstruction inst); + void ps_div(UGeckoInstruction inst); + void ps_madd(UGeckoInstruction inst); + void ps_madds0(UGeckoInstruction inst); + void ps_madds1(UGeckoInstruction inst); + void ps_merge00(UGeckoInstruction inst); + void ps_merge01(UGeckoInstruction inst); + void ps_merge10(UGeckoInstruction inst); + void ps_merge11(UGeckoInstruction inst); + void ps_mr(UGeckoInstruction inst); + void ps_msub(UGeckoInstruction inst); + void ps_mul(UGeckoInstruction inst); + void ps_muls0(UGeckoInstruction inst); + void ps_muls1(UGeckoInstruction inst); + void ps_nabs(UGeckoInstruction inst); + void ps_nmadd(UGeckoInstruction inst); + void ps_nmsub(UGeckoInstruction inst); + void ps_neg(UGeckoInstruction inst); + void ps_res(UGeckoInstruction inst); + void ps_sel(UGeckoInstruction inst); + void ps_sub(UGeckoInstruction inst); + void ps_sum0(UGeckoInstruction inst); + void ps_sum1(UGeckoInstruction inst); + private: Arm64GPRCache gpr; Arm64FPRCache fpr; @@ -112,6 +177,11 @@ private: PPCAnalyst::CodeBuffer code_buffer; + ARM64FloatEmitter m_float_emit; + + // Dump a memory range of code + void DumpCode(const u8* start, const u8* end); + // The key is the backpatch flags std::map m_backpatch_info; @@ -137,6 +207,8 @@ private: void ComputeRC(Arm64Gen::ARM64Reg reg, int crf = 0); void ComputeRC(u32 imm, int crf = 0); + void ComputeCarry(bool Carry); + void ComputeCarry(); typedef u32 (*Operation)(u32, u32); void reg_imm(u32 d, u32 a, bool binary, u32 value, Operation do_op, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, ArithOption), bool Rc = false); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 0b813e4b72..ff185214ad 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -29,7 +29,8 @@ static void DoBacktrace(uintptr_t access_address, SContext* ctx) for (u64 pc = (ctx->CTX_PC - 32); pc < (ctx->CTX_PC + 32); pc += 16) { pc_memory += StringFromFormat("%08x%08x%08x%08x", - *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12)); + Common::swap32(*(u32*)pc), Common::swap32(*(u32*)(pc + 4)), + Common::swap32(*(u32*)(pc + 8)), Common::swap32(*(u32*)(pc + 12))); ERROR_LOG(DYNA_REC, "0x%016lx: %08x %08x %08x %08x", pc, *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12)); @@ -51,10 +52,34 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg) *flags |= BackPatchInfo::FLAG_SIZE_8; else if (size == 1) // 16-bit *flags |= BackPatchInfo::FLAG_SIZE_16; - else // 32-bit + else if (size == 2) // 32-bit *flags |= BackPatchInfo::FLAG_SIZE_32; + else if (size == 3) // 64-bit + *flags |= BackPatchInfo::FLAG_SIZE_F64; - if (op == 0xE5) // Load + if (op == 0xF5) // NEON LDR + { + if (size == 2) // 32-bit float + { + *flags &= ~BackPatchInfo::FLAG_SIZE_32; + *flags |= BackPatchInfo::FLAG_SIZE_F32; + } + *flags |= BackPatchInfo::FLAG_LOAD; + *reg = (ARM64Reg)(inst & 0x1F); + return true; + } + else if (op == 0xF4) // NEON STR + { + if (size == 2) // 32-bit float + { + *flags &= ~BackPatchInfo::FLAG_SIZE_32; + *flags |= BackPatchInfo::FLAG_SIZE_F32; + } + *flags |= BackPatchInfo::FLAG_STORE; + *reg = (ARM64Reg)(inst & 0x1F); + return true; + } + else if (op == 0xE5) // Load { *flags |= BackPatchInfo::FLAG_LOAD; *reg = (ARM64Reg)(inst & 0x1F); @@ -90,10 +115,38 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, if (flags & BackPatchInfo::FLAG_STORE && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { + ARM64FloatEmitter float_emit(emit); + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + float_emit.FCVT(32, 64, Q0, RS); + float_emit.REV32(8, D0, D0); + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0); + } + else + { + float_emit.REV64(8, Q0, RS); + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0); + } } else if (flags & BackPatchInfo::FLAG_LOAD && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { + ARM64FloatEmitter float_emit(emit); + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + float_emit.LD1R(32, RS, addr); + float_emit.REV64(8, RS, RS); + float_emit.FCVTL(64, RS, RS); + } + else + { + float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0); + float_emit.REV64(8, Q0, Q0); + float_emit.INS(64, RS, 0, Q0, 0); + } } else if (flags & BackPatchInfo::FLAG_STORE) { @@ -143,10 +196,39 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, if (flags & BackPatchInfo::FLAG_STORE && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { + ARM64FloatEmitter float_emit(emit); + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + float_emit.FCVT(32, 64, Q0, RS); + float_emit.FMOV(32, false, W0, Q0); + emit->MOVI2R(X30, (u64)&Memory::Write_U32); + emit->BLR(X30); + } + else + { + emit->MOVI2R(X30, (u64)&Memory::Write_F64); + float_emit.DUP(64, Q0, RS); + emit->BLR(X30); + } + } else if (flags & BackPatchInfo::FLAG_LOAD && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { + ARM64FloatEmitter float_emit(emit); + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + emit->MOVI2R(X30, (u64)&Memory::Read_U32); + emit->BLR(X30); + float_emit.DUP(32, RS, X0); + float_emit.FCVTL(64, RS, RS); + } + else + { + emit->MOVI2R(X30, (u64)&Memory::Read_F64); + emit->BLR(X30); + float_emit.INS(64, RS, 0, X0); + } } else if (flags & BackPatchInfo::FLAG_STORE) { @@ -245,7 +327,8 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) ctx->CTX_PC = new_pc; // Wipe the top bits of the addr_register - if (flags & BackPatchInfo::FLAG_STORE) + if (flags & BackPatchInfo::FLAG_STORE && + !(flags & BackPatchInfo::FLAG_SIZE_F64)) ctx->CTX_REG(1) &= 0xFFFFFFFFUll; else ctx->CTX_REG(0) &= 0xFFFFFFFFUll; @@ -382,6 +465,46 @@ void JitArm64::InitBackpatch() SetCodePtr(code_base); + m_backpatch_info[flags] = info; + } + // 32bit float + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_F32; + EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 64bit float + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_F64; + EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + m_backpatch_info[flags] = info; } } @@ -446,6 +569,46 @@ void JitArm64::InitBackpatch() SetCodePtr(code_base); + m_backpatch_info[flags] = info; + } + // 32bit float + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_F32; + EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 64bit float + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_F64; + EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + m_backpatch_info[flags] = info; } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp new file mode 100644 index 0000000000..a670edccc8 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -0,0 +1,376 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" +#include "Common/StringUtil.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::fabsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FABS(64, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::faddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.FADD(64, VD, VA, VB); + m_float_emit.INS(64, VD, 1, VD, 0); +} + +void JitArm64::faddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FADD(64, V0, VA, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fmaddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmaddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.INS(64, VD, 0, VB, 0); +} + +void JitArm64::fmsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fmulsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VC = fpr.R(inst.FC); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.FMUL(64, VD, VA, VC); + m_float_emit.INS(64, VD, 1, VD, 0); +} + +void JitArm64::fmulx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VC = fpr.R(inst.FC); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fnabsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FABS(64, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fnegx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FB); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FNEG(64, V0, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fnmaddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fnmaddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fnmsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.DUP(64, VD, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fnmsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, V0, V0, VB); + m_float_emit.FNEG(64, V0, V0); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); +} + +void JitArm64::fselx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + fpr.BindToRegister(inst.FD, + inst.FD == inst.FA || + inst.FD == inst.FB || + inst.FD == inst.FC); + + ARM64Reg V0 = fpr.GetReg(); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VC = gpr.R(inst.FC); + + m_float_emit.FCMPE(VA); + m_float_emit.FCSEL(V0, VC, VB, CC_GE); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} + +void JitArm64::fsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + + m_float_emit.FSUB(64, VD, VA, VB); + m_float_emit.INS(64, VD, 1, VD, 0); +} + +void JitArm64::fsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); + ARM64Reg VA = fpr.R(inst.FA); + ARM64Reg VB = fpr.R(inst.FB); + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FSUB(64, V0, VA, VB); + m_float_emit.INS(64, VD, 0, V0, 0); + + fpr.Unlock(V0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a1a7ffa005..14da56ec7c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -39,6 +39,28 @@ void JitArm64::ComputeRC(u32 imm, int crf) gpr.Unlock(WA); } +void JitArm64::ComputeCarry(bool Carry) +{ + if (Carry) + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, 1); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + gpr.Unlock(WA); + return; + } + + STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca)); +} + +void JitArm64::ComputeCarry() +{ + ARM64Reg WA = gpr.GetReg(); + CSINC(WA, WSP, WSP, CC_CC); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + gpr.Unlock(WA); +} + // Following static functions are used in conjunction with reg_imm static u32 Add(u32 a, u32 b) { @@ -245,6 +267,29 @@ void JitArm64::boolX(UGeckoInstruction inst) } } +void JitArm64::addx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); + gpr.SetImmediate(d, i + j); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + ADD(gpr.R(d), gpr.R(a), gpr.R(b)); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + void JitArm64::extsXx(UGeckoInstruction inst) { INSTRUCTION_START @@ -415,3 +460,237 @@ void JitArm64::cmpli(UGeckoInstruction inst) FALLBACK_IF(true); } +void JitArm64::rlwinmx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + u32 mask = Helper_Mask(inst.MB,inst.ME); + if (gpr.IsImm(inst.RS)) + { + gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask); + if (inst.Rc) + ComputeRC(gpr.GetImm(inst.RA), 0); + return; + } + + gpr.BindToRegister(inst.RA, inst.RA == inst.RS); + + ARM64Reg WA = gpr.GetReg(); + ArithOption Shift(gpr.R(inst.RS), ST_ROR, 32 - inst.SH); + MOVI2R(WA, mask); + AND(gpr.R(inst.RA), WA, gpr.R(inst.RS), Shift); + gpr.Unlock(WA); + + if (inst.Rc) + ComputeRC(gpr.R(inst.RA), 0); +} + +void JitArm64::srawix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA; + int s = inst.RS; + int amount = inst.SH; + + if (gpr.IsImm(s)) + { + s32 imm = (s32)gpr.GetImm(s); + gpr.SetImmediate(a, imm >> amount); + + if (amount != 0 && (imm < 0) && (imm << (32 - amount))) + ComputeCarry(true); + else + ComputeCarry(false); + } + else if (amount != 0) + { + gpr.BindToRegister(a, a == s); + ARM64Reg RA = gpr.R(a); + ARM64Reg RS = gpr.R(s); + ARM64Reg WA = gpr.GetReg(); + + ORR(WA, WSP, RS, ArithOption(RS, ST_LSL, 32 - amount)); + ORR(RA, WSP, RS, ArithOption(RS, ST_ASR, amount)); + if (inst.Rc) + ComputeRC(RA, 0); + + ANDS(WSP, WA, RA, ArithOption(RA, ST_LSL, 0)); + CSINC(WA, WSP, WSP, CC_EQ); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + gpr.Unlock(WA); + } + else + { + gpr.BindToRegister(a, a == s); + ARM64Reg RA = gpr.R(a); + ARM64Reg RS = gpr.R(s); + MOV(RA, RS); + STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca)); + } +} + +void JitArm64::addic(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA, d = inst.RD; + bool rc = inst.OPCD == 13; + s32 simm = inst.SIMM_16; + u32 imm = (u32)simm; + + if (gpr.IsImm(a)) + { + + u32 i = gpr.GetImm(a); + gpr.SetImmediate(d, i + imm); + + bool has_carry = Interpreter::Helper_Carry(i, imm); + ComputeCarry(has_carry); + if (rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a); + if (imm < 4096) + { + ADDS(gpr.R(d), gpr.R(a), imm); + } + else if (simm > -4096 && simm < 0) + { + SUBS(gpr.R(d), gpr.R(a), std::abs(simm)); + } + else + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, imm); + ADDS(gpr.R(d), gpr.R(a), WA); + gpr.Unlock(WA); + } + + ComputeCarry(); + if (rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::mulli(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, d = inst.RD; + + if (gpr.IsImm(a)) + { + s32 i = (s32)gpr.GetImm(a); + gpr.SetImmediate(d, i * inst.SIMM_16); + } + else + { + gpr.BindToRegister(d, d == a); + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, (u32)(s32)inst.SIMM_16); + MUL(gpr.R(d), gpr.R(a), WA); + gpr.Unlock(WA); + } +} + +void JitArm64::mullwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); + gpr.SetImmediate(d, i * j); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + MUL(gpr.R(d), gpr.R(a), gpr.R(b)); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::addzex(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, d = inst.RD; + + gpr.BindToRegister(d, d == a); + ARM64Reg WA = gpr.GetReg(); + LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + CMP(WA, 1); + CSINC(gpr.R(d), gpr.R(a), gpr.R(a), CC_NEQ); + CMP(gpr.R(d), 0); + gpr.Unlock(WA); + ComputeCarry(); +} + +void JitArm64::subfx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + u32 i = gpr.GetImm(a), j = gpr.GetImm(b); + gpr.SetImmediate(d, j - i); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + SUB(gpr.R(d), gpr.R(b), gpr.R(a)); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::addcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + u32 i = gpr.GetImm(a), j = gpr.GetImm(b); + gpr.SetImmediate(d, i * j); + + bool has_carry = Interpreter::Helper_Carry(i, j); + ComputeCarry(has_carry); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + ADDS(gpr.R(d), gpr.R(a), gpr.R(b)); + + ComputeCarry(); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 58c1523897..c0a3176307 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -41,6 +41,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o off_reg = gpr.R(offsetReg); BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 ignore_mask(0); regs_in_use[W0] = 0; regs_in_use[W30] = 0; @@ -114,25 +115,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate) MOVI2R(XA, imm_addr); + if (update) + MOV(gpr.R(addr), addr_reg); + if (is_immediate && Memory::IsRAMAddress(imm_addr)) { EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); - - if (update) - MOVI2R(up_reg, imm_addr); } else { - if (update) - MOV(up_reg, addr_reg); - // Has a chance of being backpatched which will destroy our state // push and pop everything in this instance ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, dest_reg, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use); ABI_PopRegisters(regs_in_use, ignore_mask); } @@ -155,6 +155,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s reg_dest = gpr.R(dest); BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[W0] = 0; regs_in_use[W1] = 0; regs_in_use[W30] = 0; @@ -237,10 +238,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s // Has a chance of being backpatched which will destroy our state // push and pop everything in this instance ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, RS, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use); ABI_PopRegisters(regs_in_use); } @@ -321,8 +324,6 @@ void JitArm64::lXX(UGeckoInstruction inst) break; } - FALLBACK_IF(update); - SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); // LWZ idle skipping diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp new file mode 100644 index 0000000000..49c40a905a --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -0,0 +1,394 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::lfXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff); + + u32 a = inst.RA, b = inst.RB; + + s32 offset = inst.SIMM_16; + u32 flags = BackPatchInfo::FLAG_LOAD; + bool update = false; + s32 offset_reg = -1; + + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 567: // lfsux + flags |= BackPatchInfo::FLAG_SIZE_F32; + update = true; + offset_reg = b; + break; + case 535: // lfsx + flags |= BackPatchInfo::FLAG_SIZE_F32; + offset_reg = b; + break; + case 631: // lfdux + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + offset_reg = b; + break; + case 599: // lfdx + flags |= BackPatchInfo::FLAG_SIZE_F64; + offset_reg = b; + break; + } + break; + case 49: // lfsu + flags |= BackPatchInfo::FLAG_SIZE_F32; + update = true; + break; + case 48: // lfs + flags |= BackPatchInfo::FLAG_SIZE_F32; + break; + case 51: // lfdu + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + break; + case 50: // lfd + flags |= BackPatchInfo::FLAG_SIZE_F64; + break; + } + + u32 imm_addr = 0; + bool is_immediate = false; + + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg addr_reg = W0; + + gpr.Lock(W0, W30); + fpr.Lock(Q0); + + if (update) + { + // Always uses RA + if (gpr.IsImm(a) && offset_reg == -1) + { + is_immediate = true; + imm_addr = offset + gpr.GetImm(a); + } + else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else + { + if (offset_reg == -1) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + ADD(addr_reg, gpr.R(offset_reg), gpr.R(a)); + } + } + } + else + { + if (offset_reg == -1) + { + if (a && gpr.IsImm(a)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + offset; + } + else if (a) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else if (!a && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offset_reg); + } + else if (a) + { + ADD(addr_reg, gpr.R(a), gpr.R(offset_reg)); + } + else + { + MOV(addr_reg, gpr.R(offset_reg)); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (update) + MOV(gpr.R(a), addr_reg); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 fpr_ignore_mask(0); + regs_in_use[W0] = 0; + regs_in_use[W30] = 0; + fprs_in_use[0] = 0; // Q0 + fpr_ignore_mask[VD - Q0] = 1; + + if (is_immediate && Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, VD, XA); + } + else + { + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + VD, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use, fpr_ignore_mask); + ABI_PopRegisters(regs_in_use); + } + + gpr.Unlock(W0, W30); + fpr.Unlock(Q0); +} + +void JitArm64::stfXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff); + + u32 a = inst.RA, b = inst.RB; + + s32 offset = inst.SIMM_16; + u32 flags = BackPatchInfo::FLAG_STORE; + bool update = false; + s32 offset_reg = -1; + + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 663: // stfsx + flags |= BackPatchInfo::FLAG_SIZE_F32; + offset_reg = b; + break; + case 695: // stfsux + flags |= BackPatchInfo::FLAG_SIZE_F32; + offset_reg = b; + break; + case 727: // stfdx + flags |= BackPatchInfo::FLAG_SIZE_F64; + offset_reg = b; + break; + case 759: // stfdux + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + offset_reg = b; + break; + } + break; + case 53: // stfsu + flags |= BackPatchInfo::FLAG_SIZE_F32; + update = true; + break; + case 52: // stfs + flags |= BackPatchInfo::FLAG_SIZE_F32; + break; + case 55: // stfdu + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + break; + case 54: // stfd + flags |= BackPatchInfo::FLAG_SIZE_F64; + break; + } + + u32 imm_addr = 0; + bool is_immediate = false; + + ARM64Reg V0 = fpr.R(inst.FS); + ARM64Reg addr_reg; + if (flags & BackPatchInfo::FLAG_SIZE_F64) + addr_reg = W0; + else + addr_reg = W1; + + gpr.Lock(W0, W1, W30); + fpr.Lock(Q0); + + if (update) + { + // Always uses RA + if (gpr.IsImm(a) && offset_reg == -1) + { + is_immediate = true; + imm_addr = offset + gpr.GetImm(a); + } + else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else + { + if (offset_reg == -1) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + ADD(addr_reg, gpr.R(offset_reg), gpr.R(a)); + } + } + } + else + { + if (offset_reg == -1) + { + if (a && gpr.IsImm(a)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + offset; + } + else if (a) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else if (!a && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offset_reg); + } + else if (a) + { + ADD(addr_reg, gpr.R(a), gpr.R(offset_reg)); + } + else + { + MOV(addr_reg, gpr.R(offset_reg)); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (update) + MOV(gpr.R(a), addr_reg); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + regs_in_use[W0] = 0; + regs_in_use[W1] = 0; + regs_in_use[W30] = 0; + fprs_in_use[0] = 0; // Q0 + + if (is_immediate) + { + if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe) + { + int accessSize; + if (flags & BackPatchInfo::FLAG_SIZE_F64) + accessSize = 64; + else + accessSize = 32; + + MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount); + MOVI2R(X1, (u64)GPFifo::m_gatherPipe); + LDR(INDEX_UNSIGNED, W0, X30, 0); + ADD(X1, X1, X0); + if (accessSize == 64) + { + m_float_emit.REV64(8, Q0, V0); + m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0); + } + else if (accessSize == 32) + { + m_float_emit.FCVT(32, 64, Q0, V0); + m_float_emit.REV32(8, D0, D0); + m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0); + } + ADD(W0, W0, accessSize >> 3); + STR(INDEX_UNSIGNED, W0, X30, 0); + jit->js.fifoBytesThisBlock += accessSize >> 3; + + } + else if (Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, V0, XA); + } + else + { + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); + EmitBackpatchRoutine(this, flags, false, false, V0, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use); + ABI_PopRegisters(regs_in_use); + } + } + else + { + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + V0, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use); + ABI_PopRegisters(regs_in_use); + } + gpr.Unlock(W0, W1, W30); + fpr.Unlock(Q0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp new file mode 100644 index 0000000000..e90ee63c64 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -0,0 +1,495 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" +#include "Common/StringUtil.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::ps_abs(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == b); + + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FABS(64, VD, VB); +} + +void JitArm64::ps_add(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FADD(64, VD, VA, VB); +} + +void JitArm64::ps_div(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FDIV(64, VD, VA, VB); +} + +void JitArm64::ps_madd(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + + fpr.Unlock(V0); +} + +void JitArm64::ps_madds0(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VC, 0); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + + fpr.Unlock(V0); +} + +void JitArm64::ps_madds1(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VC, 1); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + + fpr.Unlock(V0); +} + +void JitArm64::ps_merge00(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.TRN1(64, VD, VA, VB); +} + +void JitArm64::ps_merge01(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.INS(64, VD, 0, VA, 0); + m_float_emit.INS(64, VD, 1, VB, 1); +} + +void JitArm64::ps_merge10(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + if (d != a && d != b) + { + m_float_emit.INS(64, VD, 0, VA, 1); + m_float_emit.INS(64, VD, 1, VB, 0); + } + else + { + ARM64Reg V0 = fpr.GetReg(); + m_float_emit.INS(64, V0, 0, VA, 1); + m_float_emit.INS(64, V0, 1, VB, 0); + m_float_emit.ORR(VD, V0, V0); + fpr.Unlock(V0); + } +} + +void JitArm64::ps_merge11(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.TRN2(64, VD, VA, VB); +} + +void JitArm64::ps_mr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 b = inst.FB, d = inst.FD; + + if (d == b) + return; + + fpr.BindToRegister(d, false); + + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.ORR(VD, VB, VB); +} + +void JitArm64::ps_mul(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FMUL(64, VD, VA, VC); +} + +void JitArm64::ps_muls0(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VC, 0); + m_float_emit.FMUL(64, VD, VA, V0); + fpr.Unlock(V0); +} + +void JitArm64::ps_muls1(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VC, 1); + m_float_emit.FMUL(64, VD, VA, V0); + fpr.Unlock(V0); +} + +void JitArm64::ps_msub(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, VD, V0, VB); + + fpr.Unlock(V0); +} + +void JitArm64::ps_nabs(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == b); + + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FABS(64, VD, VB); + m_float_emit.FNEG(64, VD, VD); +} + +void JitArm64::ps_neg(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == b); + + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FNEG(64, VD, VB); +} + +void JitArm64::ps_nmadd(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.FNEG(64, VD, VD); + + fpr.Unlock(V0); +} + +void JitArm64::ps_nmsub(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, VD, V0, VB); + m_float_emit.FNEG(64, VD, VD); + + fpr.Unlock(V0); +} + +void JitArm64::ps_res(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == b); + + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FRSQRTE(64, VD, VB); +} + +void JitArm64::ps_sel(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + + if (d != a && d != b && d != c) + { + m_float_emit.FCMGE(64, VD, VA); + m_float_emit.BSL(VD, VC, VB); + } + else + { + ARM64Reg V0 = fpr.GetReg(); + m_float_emit.FCMGE(64, V0, VA); + m_float_emit.BSL(V0, VC, VB); + m_float_emit.ORR(VD, V0, V0); + fpr.Unlock(V0); + } +} + +void JitArm64::ps_sub(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + m_float_emit.FSUB(64, VD, VA, VB); +} + +void JitArm64::ps_sum0(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VB, 1); + if (d != c) + { + m_float_emit.FADD(64, VD, V0, VA); + m_float_emit.INS(64, VD, 1, VC, 1); + } + else + { + m_float_emit.FADD(64, V0, V0, VA); + m_float_emit.INS(64, VD, 0, V0, 0); + } + + fpr.Unlock(V0); +} + +void JitArm64::ps_sum1(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b || d == c); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VC = fpr.R(c); + ARM64Reg VD = fpr.R(d); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VA, 0); + if (d != c) + { + m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.INS(64, VD, 0, VC, 0); + } + else + { + m_float_emit.FADD(64, V0, V0, VB); + m_float_emit.INS(64, VD, 1, V0, 1); + } + + fpr.Unlock(V0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index fc7fc952d6..0abcb29abb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -10,6 +10,7 @@ using namespace Arm64Gen; void Arm64RegCache::Init(ARM64XEmitter *emitter) { m_emit = emitter; + m_float_emit.reset(new ARM64FloatEmitter(m_emit)); GetAllocationOrder(); } @@ -56,6 +57,23 @@ void Arm64RegCache::UnlockRegister(ARM64Reg host_reg) reg->Unlock(); } +void Arm64RegCache::FlushMostStaleRegister() +{ + u32 most_stale_preg = 0; + u32 most_stale_amount = 0; + for (u32 i = 0; i < 32; ++i) + { + u32 last_used = m_guest_registers[i].GetLastUsed(); + if (last_used > most_stale_amount && + m_guest_registers[i].GetType() == REG_REG) + { + most_stale_preg = i; + most_stale_amount = last_used; + } + } + FlushRegister(most_stale_preg, false); +} + // GPR Cache void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats) { @@ -212,23 +230,6 @@ void Arm64GPRCache::GetAllocationOrder() m_host_registers.push_back(HostReg(reg)); } -void Arm64GPRCache::FlushMostStaleRegister() -{ - u32 most_stale_preg = 0; - u32 most_stale_amount = 0; - for (u32 i = 0; i < 32; ++i) - { - u32 last_used = m_guest_registers[i].GetLastUsed(); - if (last_used > most_stale_amount && - m_guest_registers[i].GetType() == REG_REG) - { - most_stale_preg = i; - most_stale_amount = last_used; - } - } - FlushRegister(most_stale_preg, false); -} - BitSet32 Arm64GPRCache::GetCallerSavedUsed() { BitSet32 registers(0); @@ -254,35 +255,120 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg) // FPR Cache void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) { - // XXX: Flush our stuff + for (int i = 0; i < 32; ++i) + { + bool flush = true; + if (mode == FLUSH_INTERPRETER) + { + if (!(op->regsOut[i] || op->regsIn[i])) + { + // This interpreted instruction doesn't use this register + flush = false; + } + } + + if (m_guest_registers[i].GetType() == REG_REG) + { + // Has to be flushed if it isn't in a callee saved register + ARM64Reg host_reg = m_guest_registers[i].GetReg(); + if (flush || !IsCalleeSaved(host_reg)) + FlushRegister(i, mode == FLUSH_MAINTAIN_STATE); + } + } } ARM64Reg Arm64FPRCache::R(u32 preg) { - // XXX: return a host reg holding a guest register + OpArg& reg = m_guest_registers[preg]; + IncrementAllUsed(); + reg.ResetLastUsed(); + + switch (reg.GetType()) + { + case REG_REG: // already in a reg + return reg.GetReg(); + break; + case REG_NOTLOADED: // Register isn't loaded at /all/ + { + ARM64Reg host_reg = GetReg(); + reg.LoadToReg(host_reg); + m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + return host_reg; + } + break; + default: + _dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!"); + break; + } + // We've got an issue if we end up here + return INVALID_REG; +} + +void Arm64FPRCache::BindToRegister(u32 preg, bool do_load) +{ + OpArg& reg = m_guest_registers[preg]; + + if (reg.GetType() == REG_NOTLOADED) + { + ARM64Reg host_reg = GetReg(); + reg.LoadToReg(host_reg); + if (do_load) + m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + } } void Arm64FPRCache::GetAllocationOrder() { const std::vector allocation_order = { - D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - D11, D12, D13, D14, D15, D16, D17, D18, D19, - D20, D21, D22, D23, D24, D25, D26, D27, D28, - D29, D30, D31, + // Callee saved + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + + // Caller saved + Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23, + Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31, + Q7, Q6, Q5, Q4, Q3, Q2, Q1, Q0 }; for (ARM64Reg reg : allocation_order) m_host_registers.push_back(HostReg(reg)); } -void Arm64FPRCache::FlushMostStaleRegister() -{ - // XXX: Flush a register -} - void Arm64FPRCache::FlushByHost(ARM64Reg host_reg) { // XXX: Scan guest registers and flush if found } +bool Arm64FPRCache::IsCalleeSaved(ARM64Reg reg) +{ + static std::vector callee_regs = + { + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, INVALID_REG, + }; + return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end(); +} + +void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) +{ + OpArg& reg = m_guest_registers[preg]; + if (reg.GetType() == REG_REG) + { + ARM64Reg host_reg = reg.GetReg(); + + m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + if (!maintain_state) + { + UnlockRegister(host_reg); + reg.Flush(); + } + } +} + +BitSet32 Arm64FPRCache::GetCallerSavedUsed() +{ + BitSet32 registers(0); + for (auto& it : m_host_registers) + if (it.IsLocked()) + registers[Q0 - it.GetReg()] = 1; + return registers; +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index d032a16f82..115fad0720 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -119,7 +119,7 @@ private: class Arm64RegCache { public: - Arm64RegCache() : m_emit(nullptr), m_reg_stats(nullptr) {}; + Arm64RegCache() : m_emit(nullptr), m_float_emit(nullptr), m_reg_stats(nullptr) {}; virtual ~Arm64RegCache() {}; void Init(ARM64XEmitter *emitter); @@ -133,10 +133,14 @@ public: // Will dump an immediate to the host register as well virtual ARM64Reg R(u32 reg) = 0; + virtual BitSet32 GetCallerSavedUsed() = 0; + // Returns a temporary register for use // Requires unlocking after done ARM64Reg GetReg(); + void StoreRegister(u32 preg) { FlushRegister(preg, false); } + // Locks a register so a cache cannot use it // Useful for function calls template @@ -166,7 +170,7 @@ protected: virtual void GetAllocationOrder() = 0; // Flushes the most stale register - virtual void FlushMostStaleRegister() = 0; + void FlushMostStaleRegister(); // Lock a register void LockRegister(ARM64Reg host_reg); @@ -177,15 +181,31 @@ protected: // Flushes a guest register by host provided virtual void FlushByHost(ARM64Reg host_reg) = 0; + virtual void FlushRegister(u32 preg, bool maintain_state) = 0; + // Get available host registers u32 GetUnlockedRegisterCount(); + void IncrementAllUsed() + { + for (auto& reg : m_guest_registers) + reg.IncrementLastUsed(); + } + // Code emitter ARM64XEmitter *m_emit; + // Float emitter + std::unique_ptr m_float_emit; + // Host side registers that hold the host registers in order of use std::vector m_host_registers; + // Our guest GPRs + // PowerPC has 32 GPRs + // PowerPC also has 32 paired FPRs + OpArg m_guest_registers[32]; + // Register stats for the current block PPCAnalyst::BlockRegStats *m_reg_stats; }; @@ -215,34 +235,20 @@ public: void BindToRegister(u32 preg, bool do_load); - void StoreRegister(u32 preg) { FlushRegister(preg, false); } - - BitSet32 GetCallerSavedUsed(); + BitSet32 GetCallerSavedUsed() override; protected: // Get the order of the host registers void GetAllocationOrder(); - // Flushes the most stale register - void FlushMostStaleRegister(); - // Flushes a guest register by host provided void FlushByHost(ARM64Reg host_reg) override; - // Our guest GPRs - // PowerPC has 32 GPRs - OpArg m_guest_registers[32]; + void FlushRegister(u32 preg, bool maintain_state) override; private: bool IsCalleeSaved(ARM64Reg reg); - void IncrementAllUsed() - { - for (auto& reg : m_guest_registers) - reg.IncrementLastUsed(); - } - - void FlushRegister(u32 preg, bool maintain_state); }; class Arm64FPRCache : public Arm64RegCache @@ -256,17 +262,19 @@ public: // Will dump an immediate to the host register as well ARM64Reg R(u32 preg); + void BindToRegister(u32 preg, bool do_load); + + BitSet32 GetCallerSavedUsed() override; + protected: // Get the order of the host registers void GetAllocationOrder(); - // Flushes the most stale register - void FlushMostStaleRegister(); - // Flushes a guest register by host provided void FlushByHost(ARM64Reg host_reg) override; - // Our guest FPRs - // Gekko has 32 paired registers(32x2) - OpArg m_guest_registers[32][2]; + void FlushRegister(u32 preg, bool maintain_state) override; + +private: + bool IsCalleeSaved(ARM64Reg reg); }; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index ea6ea6c46a..01df60015f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -196,3 +196,90 @@ void JitArm64::twx(UGeckoInstruction inst) WriteExit(js.compilerPC + 4); } } + +void JitArm64::mfspr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + + u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + switch (iIndex) + { + case SPR_XER: + case SPR_WPAR: + case SPR_DEC: + case SPR_TL: + case SPR_TU: + FALLBACK_IF(true); + default: + gpr.BindToRegister(inst.RD, false); + ARM64Reg RD = gpr.R(inst.RD); + LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); + break; + } +} + +void JitArm64::mftb(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + mfspr(inst); +} + +void JitArm64::mtspr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + + u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + + switch (iIndex) + { + case SPR_DMAU: + + case SPR_SPRG0: + case SPR_SPRG1: + case SPR_SPRG2: + case SPR_SPRG3: + + case SPR_SRR0: + case SPR_SRR1: + // These are safe to do the easy way, see the bottom of this function. + break; + + case SPR_LR: + case SPR_CTR: + case SPR_GQR0: + case SPR_GQR0 + 1: + case SPR_GQR0 + 2: + case SPR_GQR0 + 3: + case SPR_GQR0 + 4: + case SPR_GQR0 + 5: + case SPR_GQR0 + 6: + case SPR_GQR0 + 7: + // These are safe to do the easy way, see the bottom of this function. + break; + case SPR_XER: + { + FALLBACK_IF(true); + ARM64Reg RD = gpr.R(inst.RD); + ARM64Reg WA = gpr.GetReg(); + ARM64Reg mask = gpr.GetReg(); + MOVI2R(mask, 0xFF7F); + AND(WA, RD, mask, ArithOption(mask, ST_LSL, 0)); + STRH(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_stringctrl)); + UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + UBFM(WA, RD, XER_OV_SHIFT, 31); // Same as WA = RD >> XER_OV_SHIFT + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov)); + gpr.Unlock(WA, mask); + } + break; + default: + FALLBACK_IF(true); + } + + // OK, this is easy. + ARM64Reg RD = gpr.R(inst.RD); + STR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 04ed7e7be6..f1087a27c8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -45,17 +45,17 @@ static GekkoOPTemplate primarytable[] = {3, &JitArm64::twx}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - {7, &JitArm64::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, + {7, &JitArm64::mulli}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {10, &JitArm64::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &JitArm64::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {12, &JitArm64::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {13, &JitArm64::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, + {12, &JitArm64::addic}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {13, &JitArm64::addic}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, {14, &JitArm64::arith_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {15, &JitArm64::arith_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, - {21, &JitArm64::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {21, &JitArm64::rlwinmx}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, {24, &JitArm64::arith_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, @@ -84,15 +84,15 @@ static GekkoOPTemplate primarytable[] = {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - {48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, - {49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, - {51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {48, &JitArm64::lfXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, + {49, &JitArm64::lfXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {50, &JitArm64::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, + {51, &JitArm64::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, - {53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, - {55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {52, &JitArm64::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, + {53, &JitArm64::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {54, &JitArm64::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, + {55, &JitArm64::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, {57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, @@ -114,39 +114,39 @@ static GekkoOPTemplate table4[] = { //SUBOP10 {0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, {32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, - {40, &JitArm64::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, - {136, &JitArm64::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, - {264, &JitArm64::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, + {40, &JitArm64::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, + {136, &JitArm64::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, + {264, &JitArm64::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, {64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, - {72, &JitArm64::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, + {72, &JitArm64::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, {96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, - {528, &JitArm64::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, - {560, &JitArm64::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, - {592, &JitArm64::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, - {624, &JitArm64::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, + {528, &JitArm64::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, + {560, &JitArm64::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, + {592, &JitArm64::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, + {624, &JitArm64::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, {1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, }; static GekkoOPTemplate table4_2[] = { - {10, &JitArm64::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}}, - {11, &JitArm64::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}}, - {12, &JitArm64::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}}, - {13, &JitArm64::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}}, - {14, &JitArm64::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}}, - {15, &JitArm64::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}}, - {18, &JitArm64::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}}, - {20, &JitArm64::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}}, - {21, &JitArm64::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}}, - {23, &JitArm64::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, - {24, &JitArm64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, - {25, &JitArm64::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}}, + {10, &JitArm64::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}}, + {11, &JitArm64::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}}, + {12, &JitArm64::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}}, + {13, &JitArm64::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}}, + {14, &JitArm64::ps_madds0}, //"ps_madds0", OPTYPE_PS, 0}}, + {15, &JitArm64::ps_madds1}, //"ps_madds1", OPTYPE_PS, 0}}, + {18, &JitArm64::ps_div}, //"ps_div", OPTYPE_PS, 0, 16}}, + {20, &JitArm64::ps_sub}, //"ps_sub", OPTYPE_PS, 0}}, + {21, &JitArm64::ps_add}, //"ps_add", OPTYPE_PS, 0}}, + {23, &JitArm64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}}, + {24, &JitArm64::ps_res}, //"ps_res", OPTYPE_PS, 0}}, + {25, &JitArm64::ps_mul}, //"ps_mul", OPTYPE_PS, 0}}, {26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, - {28, &JitArm64::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}}, - {29, &JitArm64::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}}, - {30, &JitArm64::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}}, - {31, &JitArm64::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}}, + {28, &JitArm64::ps_msub}, //"ps_msub", OPTYPE_PS, 0}}, + {29, &JitArm64::ps_madd}, //"ps_madd", OPTYPE_PS, 0}}, + {30, &JitArm64::ps_nmsub}, //"ps_nmsub", OPTYPE_PS, 0}}, + {31, &JitArm64::ps_nmadd}, //"ps_nmadd", OPTYPE_PS, 0}}, }; @@ -196,7 +196,7 @@ static GekkoOPTemplate table31[] = {954, &JitArm64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {824, &JitArm64::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {824, &JitArm64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, @@ -208,24 +208,24 @@ static GekkoOPTemplate table31[] = {1014, &JitArm64::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, //load word - {23, &JitArm64::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {55, &JitArm64::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {23, &JitArm64::lXX}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {55, &JitArm64::lXX}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load halfword - {279, &JitArm64::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {311, &JitArm64::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {279, &JitArm64::lXX}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {311, &JitArm64::lXX}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load halfword signextend - {343, &JitArm64::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {375, &JitArm64::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {343, &JitArm64::lXX}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {375, &JitArm64::lXX}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte - {87, &JitArm64::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {119, &JitArm64::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {87, &JitArm64::lXX}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {119, &JitArm64::lXX}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte reverse - {534, &JitArm64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {790, &JitArm64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {534, &JitArm64::lXX}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {790, &JitArm64::lXX}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, // Conditional load/store (Wii SMP) {150, &JitArm64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, @@ -236,16 +236,16 @@ static GekkoOPTemplate table31[] = {597, &JitArm64::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, //store word - {151, &JitArm64::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {183, &JitArm64::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {151, &JitArm64::stX}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {183, &JitArm64::stX}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store halfword - {407, &JitArm64::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {439, &JitArm64::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {407, &JitArm64::stX}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {439, &JitArm64::stX}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store byte - {215, &JitArm64::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {247, &JitArm64::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {215, &JitArm64::stX}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {247, &JitArm64::stX}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store bytereverse {662, &JitArm64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, @@ -255,15 +255,15 @@ static GekkoOPTemplate table31[] = {725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, // fp load/store - {535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {535, &JitArm64::lfXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {567, &JitArm64::lfXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {599, &JitArm64::lfXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {631, &JitArm64::lfXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {663, &JitArm64::stfXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {695, &JitArm64::stfXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {727, &JitArm64::stfXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {759, &JitArm64::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, {19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, @@ -272,9 +272,9 @@ static GekkoOPTemplate table31[] = {146, &JitArm64::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {210, &JitArm64::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}}, {242, &JitArm64::mtsrin}, //"mtsrin", OPTYPE_SYSTEM, 0}}, - {339, &JitArm64::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, - {467, &JitArm64::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}}, - {371, &JitArm64::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, + {339, &JitArm64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, + {467, &JitArm64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, + {371, &JitArm64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, {512, &JitArm64::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {595, &JitArm64::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &JitArm64::mfsrin}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, @@ -294,25 +294,25 @@ static GekkoOPTemplate table31[] = static GekkoOPTemplate table31_2[] = { - {266, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {778, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &JitArm64::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {522, &JitArm64::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {266, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {778, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {10, &JitArm64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {522, &JitArm64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {202, &JitArm64::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {202, &JitArm64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {235, &JitArm64::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {747, &JitArm64::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {235, &JitArm64::mullwx}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {747, &JitArm64::mullwx}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {104, &JitArm64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {40, &JitArm64::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {552, &JitArm64::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {40, &JitArm64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {552, &JitArm64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, @@ -323,27 +323,27 @@ static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table59[] = { {18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, - {20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {20, &JitArm64::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, // {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, {24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::fmsubsx}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::fmaddsx}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::fnmsubsx}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, }; static GekkoOPTemplate table63[] = { - {264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {264, &JitArm64::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, {0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, - {72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, - {136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, + {72, &JitArm64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, + {136, &JitArm64::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {40, &JitArm64::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, {12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, {64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, @@ -357,16 +357,16 @@ static GekkoOPTemplate table63[] = static GekkoOPTemplate table63_2[] = { {18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, - {20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {20, &JitArm64::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, {22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, - {23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, + {23, &JitArm64::fselx}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, {26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::fmsubx}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::fmaddx}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::fnmsubx}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::fnmaddx}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, };