diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index b0b207dea3..95bf4c35f8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -97,6 +97,8 @@ public: // LoadStore void icbi(UGeckoInstruction inst); + void lXX(UGeckoInstruction inst); + void stX(UGeckoInstruction inst); private: Arm64GPRCache gpr; @@ -114,6 +116,9 @@ private: bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); void InitBackpatch(); u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr); + // Loadstore routines + void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); + void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 783a9ce78b..58c1523897 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -17,6 +17,416 @@ using namespace Arm64Gen; void JitArm64::icbi(UGeckoInstruction inst) { + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + FallBackToInterpreter(inst); WriteExit(js.compilerPC + 4); } + +void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) +{ + // We want to make sure to not get LR as a temp register + gpr.Lock(W0, W30); + + gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg); + ARM64Reg dest_reg = gpr.R(dest); + ARM64Reg up_reg = INVALID_REG; + ARM64Reg off_reg = INVALID_REG; + + if (addr != -1 && !gpr.IsImm(addr)) + up_reg = gpr.R(addr); + + if (offsetReg != -1 && !gpr.IsImm(offsetReg)) + off_reg = gpr.R(offsetReg); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 ignore_mask(0); + regs_in_use[W0] = 0; + regs_in_use[W30] = 0; + ignore_mask[dest_reg] = 1; + + ARM64Reg addr_reg = W0; + u32 imm_addr = 0; + bool is_immediate = false; + + if (offsetReg == -1) + { + if (addr != -1) + { + if (gpr.IsImm(addr)) + { + is_immediate = true; + imm_addr = gpr.GetImm(addr) + offset; + } + else + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, up_reg); + } + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (addr != -1) + { + if (gpr.IsImm(addr) && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg); + } + else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg)) + { + MOVI2R(addr_reg, gpr.GetImm(addr)); + ADD(addr_reg, addr_reg, off_reg); + } + else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg)) + { + MOVI2R(addr_reg, gpr.GetImm(offsetReg)); + ADD(addr_reg, addr_reg, up_reg); + } + else + { + ADD(addr_reg, up_reg, off_reg); + } + } + else + { + if (gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offsetReg); + } + else + { + MOV(addr_reg, off_reg); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (is_immediate && Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); + + if (update) + MOVI2R(up_reg, imm_addr); + } + else + { + if (update) + MOV(up_reg, addr_reg); + + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + dest_reg, XA); + ABI_PopRegisters(regs_in_use, ignore_mask); + } + + gpr.Unlock(W0, W30); +} + +void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset) +{ + // We want to make sure to not get LR as a temp register + gpr.Lock(W0, W1, W30); + + ARM64Reg RS = gpr.R(value); + + ARM64Reg reg_dest = INVALID_REG; + ARM64Reg reg_off = INVALID_REG; + + if (regOffset != -1 && !gpr.IsImm(regOffset)) + reg_off = gpr.R(regOffset); + if (dest != -1 && !gpr.IsImm(dest)) + reg_dest = gpr.R(dest); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + regs_in_use[W0] = 0; + regs_in_use[W1] = 0; + regs_in_use[W30] = 0; + + ARM64Reg addr_reg = W1; + + u32 imm_addr = 0; + bool is_immediate = false; + + if (regOffset == -1) + { + if (dest != -1) + { + if (gpr.IsImm(dest)) + { + is_immediate = true; + imm_addr = gpr.GetImm(dest) + offset; + } + else + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, reg_dest); + } + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (dest != -1) + { + if (gpr.IsImm(dest) && gpr.IsImm(regOffset)) + { + is_immediate = true; + imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset); + } + else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset)) + { + MOVI2R(addr_reg, gpr.GetImm(dest)); + ADD(addr_reg, addr_reg, reg_off); + } + else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset)) + { + MOVI2R(addr_reg, gpr.GetImm(regOffset)); + ADD(addr_reg, addr_reg, reg_dest); + } + else + { + ADD(addr_reg, reg_dest, reg_off); + } + } + else + { + if (gpr.IsImm(regOffset)) + { + is_immediate = true; + imm_addr = gpr.GetImm(regOffset); + } + else + { + MOV(addr_reg, reg_off); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (is_immediate && Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, RS, XA); + } + else + { + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + RS, XA); + ABI_PopRegisters(regs_in_use); + } + + gpr.Unlock(W0, W1, W30); +} + +void JitArm64::lXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + u32 a = inst.RA, b = inst.RB, d = inst.RD; + s32 offset = inst.SIMM_16; + s32 offsetReg = -1; + u32 flags = BackPatchInfo::FLAG_LOAD; + bool update = false; + + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 55: // lwzux + update = true; + case 23: // lwzx + flags |= BackPatchInfo::FLAG_SIZE_32; + offsetReg = b; + break; + case 119: //lbzux + update = true; + case 87: // lbzx + flags |= BackPatchInfo::FLAG_SIZE_8; + offsetReg = b; + break; + case 311: // lhzux + update = true; + case 279: // lhzx + flags |= BackPatchInfo::FLAG_SIZE_16; + offsetReg = b; + break; + case 375: // lhaux + update = true; + case 343: // lhax + flags |= BackPatchInfo::FLAG_EXTEND | + BackPatchInfo::FLAG_SIZE_16; + offsetReg = b; + break; + case 534: // lwbrx + flags |= BackPatchInfo::FLAG_REVERSE | + BackPatchInfo::FLAG_SIZE_32; + break; + case 790: // lhbrx + flags |= BackPatchInfo::FLAG_REVERSE | + BackPatchInfo::FLAG_SIZE_16; + break; + } + break; + case 33: // lwzu + update = true; + case 32: // lwz + flags |= BackPatchInfo::FLAG_SIZE_32; + break; + case 35: // lbzu + update = true; + case 34: // lbz + flags |= BackPatchInfo::FLAG_SIZE_8; + break; + case 41: // lhzu + update = true; + case 40: // lhz + flags |= BackPatchInfo::FLAG_SIZE_16; + break; + case 43: // lhau + update = true; + case 42: // lha + flags |= BackPatchInfo::FLAG_EXTEND | + BackPatchInfo::FLAG_SIZE_16; + break; + } + + FALLBACK_IF(update); + + SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); + + // LWZ idle skipping + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && + inst.OPCD == 32 && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) + { + // if it's still 0, we can wait until the next event + FixupBranch noIdle = CBNZ(gpr.R(d)); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + MOVI2R(XA, (u64)&PowerPC::OnIdle); + MOVI2R(W0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); + BLR(XA); + + gpr.Unlock(WA); + WriteExceptionExit(); + + SetJumpTarget(noIdle); + + //js.compilerPC += 8; + return; + } +} + +void JitArm64::stX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + u32 a = inst.RA, b = inst.RB, s = inst.RS; + s32 offset = inst.SIMM_16; + s32 regOffset = -1; + u32 flags = BackPatchInfo::FLAG_STORE; + bool update = false; + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 183: // stwux + update = true; + case 151: // stwx + flags |= BackPatchInfo::FLAG_SIZE_32; + regOffset = b; + break; + case 247: // stbux + update = true; + case 215: // stbx + flags |= BackPatchInfo::FLAG_SIZE_8; + regOffset = b; + break; + case 439: // sthux + update = true; + case 407: // sthx + flags |= BackPatchInfo::FLAG_SIZE_16; + regOffset = b; + break; + } + break; + case 37: // stwu + update = true; + case 36: // stw + flags |= BackPatchInfo::FLAG_SIZE_32; + break; + case 39: // stbu + update = true; + case 38: // stb + flags |= BackPatchInfo::FLAG_SIZE_8; + break; + case 45: // sthu + update = true; + case 44: // sth + flags |= BackPatchInfo::FLAG_SIZE_16; + break; + + } + + SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset); + + if (update) + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg RB; + ARM64Reg RA = gpr.R(a); + if (regOffset != -1) + RB = gpr.R(regOffset); + if (regOffset == -1) + { + MOVI2R(WA, offset); + ADD(RA, RA, WA); + } + else + { + ADD(RA, RA, RB); + } + gpr.Unlock(WA); + } +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 336bf47640..6e27274511 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -65,21 +65,21 @@ static GekkoOPTemplate primarytable[] = {28, &JitArm64::arith_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &JitArm64::arith_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {32, &JitArm64::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &JitArm64::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &JitArm64::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &JitArm64::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &JitArm64::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &JitArm64::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &JitArm64::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &JitArm64::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {32, &JitArm64::lXX}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &JitArm64::lXX}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &JitArm64::lXX}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &JitArm64::lXX}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &JitArm64::lXX}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &JitArm64::lXX}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &JitArm64::lXX}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &JitArm64::lXX}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {44, &JitArm64::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {45, &JitArm64::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {36, &JitArm64::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {37, &JitArm64::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {38, &JitArm64::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {39, &JitArm64::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {44, &JitArm64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {45, &JitArm64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {36, &JitArm64::stX}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {37, &JitArm64::stX}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {38, &JitArm64::stX}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {39, &JitArm64::stX}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},