From 5a28883f9ea65136f9747009d01b5a57e04d4e51 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 15:09:07 -0600 Subject: [PATCH] [AArch64] Implements 15 floating loadstores. --- Source/Core/Core/CMakeLists.txt | 1 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 16 + Source/Core/Core/PowerPC/JitArm64/Jit.h | 4 + .../JitArm64/JitArm64_LoadStoreFloating.cpp | 394 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 32 +- 5 files changed, 431 insertions(+), 16 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 61462124dd..7cbf4509f6 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -228,6 +228,7 @@ elseif(_M_ARM_64) PowerPC/JitArm64/JitArm64_FloatingPoint.cpp PowerPC/JitArm64/JitArm64_Integer.cpp PowerPC/JitArm64/JitArm64_LoadStore.cpp + PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp PowerPC/JitArm64/JitArm64_Paired.cpp PowerPC/JitArm64/JitArm64_SystemRegisters.cpp PowerPC/JitArm64/JitArm64_Tables.cpp) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 6c44411349..29cbb62e1d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -16,6 +16,7 @@ void JitArm64::Init() { AllocCodeSpace(CODE_SIZE); jo.enableBlocklink = true; + jo.optimizeGatherPipe = true; gpr.Init(this); fpr.Init(this); @@ -289,6 +290,21 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB js.next_compilerPC = ops[i + 1].address; } + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + { + js.fifoBytesThisBlock -= 32; + + gpr.Lock(W30); + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + regs_in_use[W30] = 0; + + ABI_PushRegisters(regs_in_use); + MOVI2R(X30, (u64)&GPFifo::CheckGatherPipe); + BLR(X30); + ABI_PopRegisters(regs_in_use); + gpr.Unlock(W30); + } + if (!ops[i].skip) { if (js.memcheck && (opinfo->flags & FL_USE_FPU)) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index c979633ae2..94e9e945eb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -117,6 +117,10 @@ public: void lXX(UGeckoInstruction inst); void stX(UGeckoInstruction inst); + // LoadStore floating point + void lfXX(UGeckoInstruction inst); + void stfXX(UGeckoInstruction inst); + // Floating point void fabsx(UGeckoInstruction inst); void faddsx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp new file mode 100644 index 0000000000..49c40a905a --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -0,0 +1,394 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::lfXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff); + + u32 a = inst.RA, b = inst.RB; + + s32 offset = inst.SIMM_16; + u32 flags = BackPatchInfo::FLAG_LOAD; + bool update = false; + s32 offset_reg = -1; + + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 567: // lfsux + flags |= BackPatchInfo::FLAG_SIZE_F32; + update = true; + offset_reg = b; + break; + case 535: // lfsx + flags |= BackPatchInfo::FLAG_SIZE_F32; + offset_reg = b; + break; + case 631: // lfdux + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + offset_reg = b; + break; + case 599: // lfdx + flags |= BackPatchInfo::FLAG_SIZE_F64; + offset_reg = b; + break; + } + break; + case 49: // lfsu + flags |= BackPatchInfo::FLAG_SIZE_F32; + update = true; + break; + case 48: // lfs + flags |= BackPatchInfo::FLAG_SIZE_F32; + break; + case 51: // lfdu + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + break; + case 50: // lfd + flags |= BackPatchInfo::FLAG_SIZE_F64; + break; + } + + u32 imm_addr = 0; + bool is_immediate = false; + + ARM64Reg VD = fpr.R(inst.FD); + ARM64Reg addr_reg = W0; + + gpr.Lock(W0, W30); + fpr.Lock(Q0); + + if (update) + { + // Always uses RA + if (gpr.IsImm(a) && offset_reg == -1) + { + is_immediate = true; + imm_addr = offset + gpr.GetImm(a); + } + else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else + { + if (offset_reg == -1) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + ADD(addr_reg, gpr.R(offset_reg), gpr.R(a)); + } + } + } + else + { + if (offset_reg == -1) + { + if (a && gpr.IsImm(a)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + offset; + } + else if (a) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else if (!a && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offset_reg); + } + else if (a) + { + ADD(addr_reg, gpr.R(a), gpr.R(offset_reg)); + } + else + { + MOV(addr_reg, gpr.R(offset_reg)); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (update) + MOV(gpr.R(a), addr_reg); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 fpr_ignore_mask(0); + regs_in_use[W0] = 0; + regs_in_use[W30] = 0; + fprs_in_use[0] = 0; // Q0 + fpr_ignore_mask[VD - Q0] = 1; + + if (is_immediate && Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, VD, XA); + } + else + { + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + VD, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use, fpr_ignore_mask); + ABI_PopRegisters(regs_in_use); + } + + gpr.Unlock(W0, W30); + fpr.Unlock(Q0); +} + +void JitArm64::stfXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff); + + u32 a = inst.RA, b = inst.RB; + + s32 offset = inst.SIMM_16; + u32 flags = BackPatchInfo::FLAG_STORE; + bool update = false; + s32 offset_reg = -1; + + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 663: // stfsx + flags |= BackPatchInfo::FLAG_SIZE_F32; + offset_reg = b; + break; + case 695: // stfsux + flags |= BackPatchInfo::FLAG_SIZE_F32; + offset_reg = b; + break; + case 727: // stfdx + flags |= BackPatchInfo::FLAG_SIZE_F64; + offset_reg = b; + break; + case 759: // stfdux + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + offset_reg = b; + break; + } + break; + case 53: // stfsu + flags |= BackPatchInfo::FLAG_SIZE_F32; + update = true; + break; + case 52: // stfs + flags |= BackPatchInfo::FLAG_SIZE_F32; + break; + case 55: // stfdu + flags |= BackPatchInfo::FLAG_SIZE_F64; + update = true; + break; + case 54: // stfd + flags |= BackPatchInfo::FLAG_SIZE_F64; + break; + } + + u32 imm_addr = 0; + bool is_immediate = false; + + ARM64Reg V0 = fpr.R(inst.FS); + ARM64Reg addr_reg; + if (flags & BackPatchInfo::FLAG_SIZE_F64) + addr_reg = W0; + else + addr_reg = W1; + + gpr.Lock(W0, W1, W30); + fpr.Lock(Q0); + + if (update) + { + // Always uses RA + if (gpr.IsImm(a) && offset_reg == -1) + { + is_immediate = true; + imm_addr = offset + gpr.GetImm(a); + } + else if (gpr.IsImm(a) && offset_reg != -1 && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else + { + if (offset_reg == -1) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + ADD(addr_reg, gpr.R(offset_reg), gpr.R(a)); + } + } + } + else + { + if (offset_reg == -1) + { + if (a && gpr.IsImm(a)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + offset; + } + else if (a) + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, gpr.R(a)); + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (a && gpr.IsImm(a) && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offset_reg); + } + else if (!a && gpr.IsImm(offset_reg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offset_reg); + } + else if (a) + { + ADD(addr_reg, gpr.R(a), gpr.R(offset_reg)); + } + else + { + MOV(addr_reg, gpr.R(offset_reg)); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (update) + MOV(gpr.R(a), addr_reg); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + regs_in_use[W0] = 0; + regs_in_use[W1] = 0; + regs_in_use[W30] = 0; + fprs_in_use[0] = 0; // Q0 + + if (is_immediate) + { + if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe) + { + int accessSize; + if (flags & BackPatchInfo::FLAG_SIZE_F64) + accessSize = 64; + else + accessSize = 32; + + MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount); + MOVI2R(X1, (u64)GPFifo::m_gatherPipe); + LDR(INDEX_UNSIGNED, W0, X30, 0); + ADD(X1, X1, X0); + if (accessSize == 64) + { + m_float_emit.REV64(8, Q0, V0); + m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0); + } + else if (accessSize == 32) + { + m_float_emit.FCVT(32, 64, Q0, V0); + m_float_emit.REV32(8, D0, D0); + m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0); + } + ADD(W0, W0, accessSize >> 3); + STR(INDEX_UNSIGNED, W0, X30, 0); + jit->js.fifoBytesThisBlock += accessSize >> 3; + + } + else if (Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, V0, XA); + } + else + { + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); + EmitBackpatchRoutine(this, flags, false, false, V0, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use); + ABI_PopRegisters(regs_in_use); + } + } + else + { + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + V0, XA); + m_float_emit.ABI_PopRegisters(fprs_in_use); + ABI_PopRegisters(regs_in_use); + } + gpr.Unlock(W0, W1, W30); + fpr.Unlock(Q0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 3d67cb4d03..f1087a27c8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -84,15 +84,15 @@ static GekkoOPTemplate primarytable[] = {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - {48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, - {49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, - {51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {48, &JitArm64::lfXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, + {49, &JitArm64::lfXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {50, &JitArm64::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, + {51, &JitArm64::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, - {53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, - {55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {52, &JitArm64::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, + {53, &JitArm64::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {54, &JitArm64::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, + {55, &JitArm64::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, {57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, @@ -255,15 +255,15 @@ static GekkoOPTemplate table31[] = {725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, // fp load/store - {535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {535, &JitArm64::lfXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {567, &JitArm64::lfXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {599, &JitArm64::lfXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {631, &JitArm64::lfXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {663, &JitArm64::stfXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {695, &JitArm64::stfXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {727, &JitArm64::stfXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {759, &JitArm64::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, {19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},