From 2c39d4044d3bd8b8d83a79c86937d6d5469f5432 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 18:08:40 -0600 Subject: [PATCH 1/7] [AArch64] Add loadstore paired emitter instructions. --- Source/Core/Common/Arm64Emitter.cpp | 50 +++++++++++++++++++++++++++++ Source/Core/Common/Arm64Emitter.h | 12 +++++++ Source/Core/Common/ArmEmitter.h | 2 +- 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index ef976ab6d2..53405da955 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -491,6 +491,42 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i (immr << 16) | (imms << 10) | (Rn << 5) | Rd); } +void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) +{ + bool b64Bit = Is64Bit(Rt); + u32 type_encode = 0; + + switch (type) + { + case INDEX_UNSIGNED: + type_encode = 0b010; + break; + case INDEX_POST: + type_encode = 0b001; + break; + case INDEX_PRE: + type_encode = 0b011; + break; + } + + if (b64Bit) + { + op |= 0b10; + imm >>= 3; + } + else + { + imm >>= 2; + } + + Rt = DecodeReg(Rt); + Rt2 = DecodeReg(Rt2); + Rn = DecodeReg(Rn); + + Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | \ + ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt); +} + // FixupBranch branching void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) { @@ -1120,6 +1156,20 @@ void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm) EncodeLoadRegisterInst(3, Rt, imm); } +// Load/Store pair +void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) +{ + EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm); +} +void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) +{ + EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm); +} +void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) +{ + EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm); +} + // Load/Store Exclusive void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) { diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 427809d850..2c31f89eb9 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -299,6 +299,7 @@ private: void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); protected: inline void Write32(u32 value) @@ -313,6 +314,12 @@ public: { } + ARM64XEmitter(u8* code_ptr) { + m_code = code_ptr; + m_lastCacheFlushEnd = code_ptr; + m_startcode = code_ptr; + } + virtual ~ARM64XEmitter() { } @@ -539,6 +546,11 @@ public: void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); void PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + // Load/Store pair + void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); + void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); + void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); + // Wrapper around MOVZ+MOVK void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true); }; diff --git a/Source/Core/Common/ArmEmitter.h b/Source/Core/Common/ArmEmitter.h index c4139b67c7..8aff8d8f38 100644 --- a/Source/Core/Common/ArmEmitter.h +++ b/Source/Core/Common/ArmEmitter.h @@ -351,7 +351,7 @@ public: ARMXEmitter() : code(nullptr), startcode(nullptr), lastCacheFlushEnd(nullptr) { condition = CC_AL << 28; } - ARMXEmitter(u8 *code_ptr) { + ARMXEmitter(u8* code_ptr) { code = code_ptr; lastCacheFlushEnd = code_ptr; startcode = code_ptr; From c3c80e94400653cd80db5cb30406d6ad1368cf4d Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 18:12:41 -0600 Subject: [PATCH 2/7] [AArch64] Improves the register cache. Removes the REG_AWAY nonsense I was doing. I've got to get the JIT more up to speed before thinking of insane register cache things. Also fixes a bug in immediate setting where if the register being set to an immediate already had a host register tied to it then it wouldn't free the register it had. Resulting in register exhaustion. --- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 221 ++---------------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 29 +-- 2 files changed, 23 insertions(+), 227 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 052d0ea9bf..d8d9e7ca07 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -63,45 +63,6 @@ void Arm64RegCache::UnlockRegister(ARM64Reg host_reg) // GPR Cache void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats) { - // To make this technique easy, let's just work on pairs of even/odd registers - // We could do simple odd/even as well to get a few spare temporary registers - // but it isn't really needed, we aren't starved for registers - for (int reg = 0; reg < 32; reg += 2) - { - u32 regs_used = (stats.IsUsed(reg) << 1) | stats.IsUsed(reg + 1); - switch (regs_used) - { - case 0x02: // Reg+0 used - { - ARM64Reg host_reg = GetReg(); - m_guest_registers[reg].LoadToReg(host_reg); - m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg])); - } - break; - case 0x01: // Reg+1 used - { - ARM64Reg host_reg = GetReg(); - m_guest_registers[reg + 1].LoadToReg(host_reg); - m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg + 1])); - } - break; - case 0x03: // Both registers used - { - // Get a 64bit host register - ARM64Reg host_reg = EncodeRegTo64(GetReg()); - m_guest_registers[reg].LoadToAway(host_reg, REG_LOW); - m_guest_registers[reg + 1].LoadToAway(host_reg, REG_HIGH); - - // host_reg is 64bit here. - // It'll load both guest_registers in one LDR - m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg])); - } - break; - case 0x00: // Neither used - default: - break; - } - } } bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) @@ -116,77 +77,34 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) void Arm64GPRCache::FlushRegister(u32 preg) { - u32 base_reg = preg; OpArg& reg = m_guest_registers[preg]; if (reg.GetType() == REG_REG) { ARM64Reg host_reg = reg.GetReg(); m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); - Unlock(host_reg); + UnlockRegister(host_reg); reg.Flush(); } else if (reg.GetType() == REG_IMM) { - ARM64Reg host_reg = GetReg(); - - m_emit->MOVI2R(host_reg, reg.GetImm()); - m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); - - Unlock(host_reg); - - reg.Flush(); - } - else if (reg.GetType() == REG_AWAY) - { - u32 next_reg = 0; - if (reg.GetAwayLocation() == REG_LOW) - next_reg = base_reg + 1; - else - next_reg = base_reg - 1; - OpArg& reg2 = m_guest_registers[next_reg]; - ARM64Reg host_reg = reg.GetAwayReg(); - ARM64Reg host_reg_1 = reg.GetReg(); - ARM64Reg host_reg_2 = reg2.GetReg(); - // Flush if either of these shared registers are used. - if (host_reg_1 == INVALID_REG) + if (!reg.GetImm()) { - // We never loaded this register - // We've got to test the state of our shared register - // Currently it is always reg+1 - if (host_reg_2 == INVALID_REG) - { - // We didn't load either of these registers - // This can happen in cases where we had to flush register state - // or if we hit an interpreted instruction before we could use it - // Dump the whole thing in one go and flush both registers - - // 64bit host register will store 2 32bit store registers in one go - if (reg.GetAwayLocation() == REG_LOW) - m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[base_reg])); - else - m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[next_reg])); - } - else - { - // Alright, bottom register isn't used, but top one is - // Only store the top one - m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[next_reg])); - Unlock(host_reg_2); - } + m_emit->STR(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(gpr[preg])); } else { - m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[base_reg])); - Unlock(host_reg_1); - } - // Flush both registers - reg.Flush(); - reg2.Flush(); - Unlock(DecodeReg(host_reg)); - } + ARM64Reg host_reg = GetReg(); + m_emit->MOVI2R(host_reg, reg.GetImm()); + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + + UnlockRegister(host_reg); + } + + reg.Flush(); + } } void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) @@ -196,11 +114,7 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) bool flush = true; if (mode == FLUSH_INTERPRETER) { - if (!(op->regsOut[0] == i || - op->regsOut[1] == i || - op->regsIn[0] == i || - op->regsIn[1] == i || - op->regsIn[2] == i)) + if (!(op->regsOut[i] || op->regsIn[i])) { // This interpreted instruction doesn't use this register flush = false; @@ -219,39 +133,6 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) if (flush) FlushRegister(i); } - else if (m_guest_registers[i].GetType() == REG_AWAY) - { - // We are away, that means that this register and the next are stored in a single 64bit register - // There is a very good chance that both the registers are out in some "temp" register - bool flush_2 = true; - if (mode == FLUSH_INTERPRETER) - { - if (!(op->regsOut[0] == (i + 1) || - op->regsOut[1] == (i + 1) || - op->regsIn[0] == (i + 1) || - op->regsIn[1] == (i + 1) || - op->regsIn[2] == (i + 1))) - { - // This interpreted instruction doesn't use this register - flush_2 = false; - } - } - - ARM64Reg host_reg = m_guest_registers[i].GetAwayReg(); - ARM64Reg host_reg_1 = m_guest_registers[i].GetReg(); - ARM64Reg host_reg_2 = m_guest_registers[i + 1].GetReg(); - // Flush if either of these shared registers are used. - if (flush || - flush_2 || - !IsCalleeSaved(host_reg) || - !IsCalleeSaved(host_reg_1) || - !IsCalleeSaved(host_reg_2)) - { - FlushRegister(i); // Will flush both pairs of registers - } - // Skip the next register since we've handled it here - ++i; - } } } @@ -274,71 +155,6 @@ ARM64Reg Arm64GPRCache::R(u32 preg) return host_reg; } break; - case REG_AWAY: // Register is away in a shared register - { - // Let's do the voodoo that we dodo - if (reg.GetReg() == INVALID_REG) - { - // Alright, we need to extract from our away register - // To our new 32bit register - if (reg.GetAwayLocation() == REG_LOW) - { - OpArg& upper_reg = m_guest_registers[preg + 1]; - if (upper_reg.GetType() == REG_REG) - { - // If the upper reg is already moved away, just claim this one as ours now - ARM64Reg host_reg = reg.GetAwayReg(); - reg.LoadToReg(DecodeReg(host_reg)); - return host_reg; - } - else - { - // Top register is still loaded - // Make sure to move to a new register - ARM64Reg host_reg = GetReg(); - ARM64Reg current_reg = reg.GetAwayReg(); - reg.LoadToReg(host_reg); - - // We are in the low bits - // Just move it over to the low bits of the new register - m_emit->UBFM(EncodeRegTo64(host_reg), current_reg, 0, 31); - return host_reg; - } - } - else - { - OpArg& lower_reg = m_guest_registers[preg - 1]; - if (lower_reg.GetType() == REG_REG) - { - // If the lower register is moved away, claim this one as ours - ARM64Reg host_reg = reg.GetAwayReg(); - reg.LoadToReg(DecodeReg(host_reg)); - - // Make sure to move our register from the high bits to the low bits - m_emit->UBFM(EncodeRegTo64(host_reg), host_reg, 32, 63); - return host_reg; - } - else - { - // Load this register in to the new low bits - // We are no longer away - ARM64Reg host_reg = GetReg(); - ARM64Reg current_reg = reg.GetAwayReg(); - reg.LoadToReg(host_reg); - - // We are in the high bits - m_emit->UBFM(EncodeRegTo64(host_reg), current_reg, 32, 63); - return host_reg; - } - } - } - else - { - // We've already moved to a valid place to work on - return reg.GetReg(); - } - } - break; case REG_NOTLOADED: // Register isn't loaded at /all/ { // This is a bit annoying. We try to keep these preloaded as much as possible @@ -357,6 +173,14 @@ ARM64Reg Arm64GPRCache::R(u32 preg) return INVALID_REG; } +void Arm64GPRCache::SetImmediate(u32 preg, u32 imm) +{ + OpArg& reg = m_guest_registers[preg]; + if (reg.GetType() == REG_REG) + Unlock(reg.GetReg()); + reg.LoadToImm(imm); +} + void Arm64GPRCache::GetAllocationOrder() { // Callee saved registers first in hopes that we will keep everything stored there first @@ -380,8 +204,7 @@ void Arm64GPRCache::FlushMostStaleRegister() { u32 last_used = m_guest_registers[i].GetLastUsed(); if (last_used > most_stale_amount && - m_guest_registers[i].GetType() != REG_IMM && - m_guest_registers[i].GetType() != REG_NOTLOADED) + m_guest_registers[i].GetType() == REG_REG) { most_stale_preg = i; most_stale_amount = last_used; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 1eb1afd3d2..9f1c341624 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -19,7 +19,6 @@ enum RegType REG_NOTLOADED = 0, REG_REG, // Reg type is register REG_IMM, // Reg is really a IMM - REG_AWAY, // Reg is away }; enum RegLocation { @@ -56,14 +55,6 @@ public: { return m_reg; } - ARM64Reg GetAwayReg() - { - return m_away_reg; - } - RegLocation GetAwayLocation() - { - return m_away_location; - } u32 GetImm() { return m_value; @@ -72,16 +63,6 @@ public: { m_type = REG_REG; m_reg = reg; - - m_away_reg = INVALID_REG; - } - void LoadToAway(ARM64Reg reg, RegLocation location) - { - m_type = REG_AWAY; - m_away_reg = reg; - m_away_location = location; - - m_reg = INVALID_REG; } void LoadToImm(u32 imm) { @@ -89,14 +70,12 @@ public: m_value = imm; m_reg = INVALID_REG; - m_away_reg = INVALID_REG; } void Flush() { // Invalidate any previous information m_type = REG_NOTLOADED; m_reg = INVALID_REG; - m_away_reg = INVALID_REG; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -111,12 +90,6 @@ private: RegType m_type; // store type ARM64Reg m_reg; // host register we are in - // For REG_AWAY - // Host register that we are away in - // This is a 64bit register - ARM64Reg m_away_reg; - RegLocation m_away_location; - // For REG_IMM u32 m_value; // IMM value @@ -227,7 +200,7 @@ public: ARM64Reg R(u32 preg); // Set a register to an immediate - void SetImmediate(u32 reg, u32 imm) { m_guest_registers[reg].LoadToImm(imm); } + void SetImmediate(u32 preg, u32 imm); // Returns if a register is set as an immediate bool IsImm(u32 reg) { return m_guest_registers[reg].GetType() == REG_IMM; } From ca04601b141bca5aea325aa2708c299ce964786b Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 19:07:31 -0600 Subject: [PATCH 3/7] [AArch64] Fixes the dispatcher Changes the dispatcher to make sure to we are saving the LR(X30) to the stack. Also makes sure to keep the stack aligned. AArch64's AAPCS64 mandates the stack to be quad-word aligned. Fixes the dispatcher from infinite looping due to a downcount check jumping to the dispatcher. This was because checking exceptions and the state pointer wouldn't reset the global conditional flags. So it would leave the timing/exception, jump to the start of the dispatcher and then jump back again due to the conditional branch. --- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index ee36b50e63..0cc89722cc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -15,6 +15,9 @@ void JitArm64AsmRoutineManager::Generate() { enterCode = GetCodePtr(); + SUB(SP, SP, 16); + STR(INDEX_UNSIGNED, X30, SP, 0); + MOVI2R(X29, (u64)&PowerPC::ppcState); dispatcher = GetCodePtr(); @@ -64,14 +67,20 @@ void JitArm64AsmRoutineManager::Generate() // Check the state pointer to see if we are exiting // Gets checked on every exception check - MOVI2R(W0, (u64)PowerPC::GetStatePtr()); - LDR(INDEX_UNSIGNED, W0, W0, 0); - FixupBranch Exit = CBNZ(W0); + MOVI2R(X0, (u64)PowerPC::GetStatePtr()); + LDR(INDEX_UNSIGNED, W0, X0, 0); + + CMP(W0, 0); + FixupBranch Exit = B(CC_NEQ); B(dispatcher); SetJumpTarget(Exit); + LDR(INDEX_UNSIGNED, X30, SP, 0); + ADD(SP, SP, 16); + RET(X30); + FlushIcache(); } From 51ad7981050f6891bef18a73a96907663116e8b5 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 19:13:28 -0600 Subject: [PATCH 4/7] [AArch64] Implements block linking. Before block linking was enabled but it wasn't ever implemented. Implements link blocks and destroy block functions and moves the downcount check in the WriteExit function so it doesn't get overwritten when linking. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 8 ++++---- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp | 9 +++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index d055b24d78..4a94cec890 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -97,6 +97,8 @@ void JitArm64::DoDownCount() // Exits void JitArm64::WriteExit(u32 destination) { + DoDownCount(); + //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; JitBlock::LinkData linkData; @@ -104,8 +106,6 @@ void JitArm64::WriteExit(u32 destination) linkData.exitPtrs = GetWritableCodePtr(); linkData.linkStatus = false; - DoDownCount(); - // Link opportunity! int block; if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) @@ -163,13 +163,12 @@ void JitArm64::SingleStep() pExecAddr(); } -void JitArm64::Jit(u32 em_address) +void JitArm64::Jit(u32) { if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache) { ClearCache(); } - int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc); JitBlock *b = blocks.GetBlock(block_num); const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b); @@ -282,6 +281,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = code_block.m_num_instructions; + FlushIcache(); return start; } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 05aeb0609c..d52e7c5ba4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -47,7 +47,7 @@ public: void Run(); void SingleStep(); - void Jit(u32 em_address); + void Jit(u32); const char *GetName() { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp index 55c319a7cc..89d2c40bf4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -8,9 +8,18 @@ void JitArm64BlockCache::WriteLinkBlock(u8* location, const u8* address) { + ARM64XEmitter emit(location); + emit.B(address); + emit.FlushIcache(); } void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address) { + ARM64XEmitter emit((u8 *)location); + emit.MOVI2R(W0, address); + emit.MOVI2R(X30, (u64)jit->GetAsmRoutines()->dispatcher); + emit.STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + emit.BR(X30); + emit.FlushIcache(); } From 8dfb8d8ad5c94411c1370c20e5c1f9e89d95a057 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 19:15:19 -0600 Subject: [PATCH 5/7] [AArch64] Implements HLE function injection --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4a94cec890..7ea0c16d65 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -59,8 +59,17 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) void JitArm64::HLEFunction(UGeckoInstruction inst) { - WARN_LOG(DYNA_REC, "HLEFunction %08x - Fix me ;)", inst.hex); - exit(0); + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + MOVI2R(W0, js.compilerPC); + MOVI2R(W1, inst.hex); + MOVI2R(X30, (u64)&HLE::Execute); + BLR(X30); + + ARM64Reg WA = gpr.GetReg(); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); + WriteExitDestInR(WA); } void JitArm64::DoNothing(UGeckoInstruction inst) From 71b77f317332618957b6d9a1728c815b3ae5fe4e Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 19:16:03 -0600 Subject: [PATCH 6/7] [AArch64] Makes some integer instructions more clear what they're doing. ComputeRC was a bit unclear by using 64bit registers for setting the immediate and then calling SXTW on a 6b4it register which is just a bit obscure. When the source register is an immediate in cntlzwx, just use the built in GCC function instead of our own implementing for counting leading zeros. --- .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index ca3db467f0..84c00449be 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -22,8 +22,8 @@ void JitArm64::ComputeRC(u32 d) if (gpr.IsImm(d)) { - MOVI2R(XA, gpr.GetImm(d)); - SXTW(XA, XA); + MOVI2R(WA, gpr.GetImm(d)); + SXTW(XA, WA); } else { @@ -252,20 +252,9 @@ void JitArm64::cntlzwx(UGeckoInstruction inst) int s = inst.RS; if (gpr.IsImm(s)) - { - u32 mask = 0x80000000; - u32 i = 0; - for (; i < 32; i++, mask >>= 1) - { - if ((u32)gpr.GetImm(s) & mask) - break; - } - gpr.SetImmediate(a, i); - } + gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s))); else - { CLZ(gpr.R(a), gpr.R(s)); - } if (inst.Rc) ComputeRC(a); From 71e4e67ae13e48f8322726d8e73920fa4806031b Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Dec 2014 19:18:27 -0600 Subject: [PATCH 7/7] [AArch64] Only flush registers that are needed with interpreter fallback. We try to keep as many registers as possible in callee saved registers, so if we have guest registers in the correct registers and the interpreter call we are falling back to doesn't need the registers then we can dump just those ones. Which means we don't have to dump 100% of our register state when falling to the interpreter. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 7ea0c16d65..97f0b00336 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -49,8 +49,8 @@ void JitArm64::unknown_instruction(UGeckoInstruction inst) void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) { - gpr.Flush(FlushMode::FLUSH_ALL); - fpr.Flush(FlushMode::FLUSH_ALL); + gpr.Flush(FlushMode::FLUSH_INTERPRETER, js.op); + fpr.Flush(FlushMode::FLUSH_INTERPRETER, js.op); Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); MOVI2R(W0, inst.hex); MOVI2R(X30, (u64)instr);