From 62e1d7ad99bc2f25c99cc838c345b35d7b7c4097 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 16 Jun 2024 17:24:58 +0200 Subject: [PATCH 01/10] JitArm64: Add ScopedARM64Reg --- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 019edaa726..159cfb6836 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -177,6 +177,59 @@ public: // Requires unlocking after done Arm64Gen::ARM64Reg GetReg(); + class ScopedARM64Reg + { + public: + inline ScopedARM64Reg() = default; + ScopedARM64Reg(const ScopedARM64Reg&) = delete; + explicit inline ScopedARM64Reg(Arm64RegCache& cache) : m_reg(cache.GetReg()), m_gpr(&cache) {} + inline ScopedARM64Reg(Arm64Gen::ARM64Reg reg) : m_reg(reg) {} + inline ScopedARM64Reg(ScopedARM64Reg&& scoped_reg) { *this = std::move(scoped_reg); } + inline ~ScopedARM64Reg() { Unlock(); } + + inline ScopedARM64Reg& operator=(const ScopedARM64Reg&) = delete; + inline ScopedARM64Reg& operator=(Arm64Gen::ARM64Reg reg) + { + Unlock(); + m_reg = reg; + return *this; + } + inline ScopedARM64Reg& operator=(ScopedARM64Reg&& scoped_reg) + { + // Taking ownership of an existing scoped register, no need to release. + m_reg = scoped_reg.m_reg; + m_gpr = scoped_reg.m_gpr; + scoped_reg.Invalidate(); + return *this; + } + + inline Arm64Gen::ARM64Reg GetReg() const { return m_reg; } + inline operator Arm64Gen::ARM64Reg() const { return GetReg(); } + inline void Unlock() + { + // Only unlock the register if GPR is set. + if (m_gpr != nullptr) + { + m_gpr->Unlock(m_reg); + } + Invalidate(); + } + + private: + inline void Invalidate() + { + m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + m_gpr = nullptr; + } + + Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + Arm64RegCache* m_gpr = nullptr; + }; + + // Returns a temporary register + // Unlocking is implicitly handled through RAII + inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this); } + void UpdateLastUsed(BitSet32 regs_used); // Get available host registers From c0a0746d65172503d7df9fb51099ca332f6f5bc5 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 16 Jun 2024 17:25:40 +0200 Subject: [PATCH 02/10] JitArm64_Integer: Use ScopedARM64Reg --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 223 ++++++++---------- 1 file changed, 103 insertions(+), 120 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 63ccff4c47..8b00447ffd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -86,10 +86,9 @@ void JitArm64::LoadCarry() { case CarryFlag::InPPCState: { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); CMP(WA, 1); - gpr.Unlock(WA); break; } case CarryFlag::InHostCarry: @@ -119,18 +118,16 @@ void JitArm64::FlushCarry() } case CarryFlag::InHostCarry: { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); CSET(WA, CC_CS); STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(WA); break; } case CarryFlag::ConstantTrue: { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); MOVI2R(WA, 1); STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(WA); break; } case CarryFlag::ConstantFalse: @@ -155,9 +152,10 @@ void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), else { gpr.BindToRegister(d, d == a); - ARM64Reg WA = gpr.GetReg(); - (this->*op)(gpr.R(d), gpr.R(a), value, WA); - gpr.Unlock(WA); + { + auto WA = gpr.GetScopedReg(); + (this->*op)(gpr.R(d), gpr.R(a), value, WA); + } if (Rc) ComputeRC0(gpr.R(d)); @@ -245,9 +243,8 @@ void JitArm64::addix(UGeckoInstruction inst) { gpr.BindToRegister(d, d == a); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ADDI2R(gpr.R(d), gpr.R(a), imm, WA); - gpr.Unlock(WA); } } else @@ -544,9 +541,10 @@ void JitArm64::addx(UGeckoInstruction inst) int imm_value = gpr.GetImm(imm_reg); gpr.BindToRegister(d, d == in_reg); - ARM64Reg WA = gpr.GetReg(); - ADDI2R(gpr.R(d), gpr.R(in_reg), imm_value, WA); - gpr.Unlock(WA); + { + auto WA = gpr.GetScopedReg(); + ADDI2R(gpr.R(d), gpr.R(in_reg), imm_value, WA); + } if (inst.Rc) ComputeRC0(gpr.R(d)); } @@ -722,9 +720,8 @@ void JitArm64::cmpi(UGeckoInstruction inst) if (B != 0) { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); SUBI2R(CR, CR, B, EncodeRegTo64(WA)); - gpr.Unlock(WA); } } @@ -796,10 +793,9 @@ void JitArm64::rlwinmx_internal(UGeckoInstruction inst, u32 sh) } else { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); MOVI2R(WA, mask); AND(gpr.R(a), WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, 32 - sh)); - gpr.Unlock(WA); } if (inst.Rc) @@ -829,11 +825,12 @@ void JitArm64::rlwnmx(UGeckoInstruction inst) const u32 mask = MakeRotationMask(inst.MB, inst.ME); gpr.BindToRegister(a, a == s || a == b); - ARM64Reg WA = gpr.GetReg(); - NEG(WA, gpr.R(b)); - RORV(gpr.R(a), gpr.R(s), WA); - ANDI2R(gpr.R(a), gpr.R(a), mask, WA); - gpr.Unlock(WA); + { + auto WA = gpr.GetScopedReg(); + NEG(WA, gpr.R(b)); + RORV(gpr.R(a), gpr.R(s), WA); + ANDI2R(gpr.R(a), gpr.R(a), mask, WA); + } if (inst.Rc) ComputeRC0(gpr.R(a)); @@ -878,8 +875,8 @@ void JitArm64::srawix(UGeckoInstruction inst) if (js.op->wantsCA) { - ARM64Reg WA = gpr.GetReg(); - ARM64Reg dest = inplace_carry ? WA : ARM64Reg::WSP; + auto WA = gpr.GetScopedReg(); + ARM64Reg dest = inplace_carry ? ARM64Reg(WA) : ARM64Reg::WSP; if (a != s) { ASR(RA, RS, amount); @@ -901,7 +898,6 @@ void JitArm64::srawix(UGeckoInstruction inst) CSINC(WA, ARM64Reg::WSP, ARM64Reg::WSP, CC_EQ); ComputeCarry(WA); } - gpr.Unlock(WA); } else { @@ -936,9 +932,10 @@ void JitArm64::addic(UGeckoInstruction inst) else { gpr.BindToRegister(d, d == a); - ARM64Reg WA = gpr.GetReg(); - CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA); - gpr.Unlock(WA); + { + auto WA = gpr.GetScopedReg(); + CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA); + } ComputeCarry(); if (rc) @@ -1037,12 +1034,10 @@ void JitArm64::mulli(UGeckoInstruction inst) gpr.BindToRegister(d, allocate_reg); // Reuse d to hold the immediate if possible, allocate a register otherwise. - ARM64Reg WA = allocate_reg ? gpr.GetReg() : gpr.R(d); + auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(d)); MOVI2R(WA, (u32)(s32)inst.SIMM_16); MUL(gpr.R(d), gpr.R(a), WA); - if (allocate_reg) - gpr.Unlock(WA); } } @@ -1137,16 +1132,16 @@ void JitArm64::addzex(UGeckoInstruction inst) { case CarryFlag::InPPCState: { - gpr.BindToRegister(d, d == a); - ARM64Reg WA = d == a ? gpr.GetReg() : gpr.R(d); + const bool allocate_reg = d == a; + gpr.BindToRegister(d, allocate_reg); + + { + auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(d)); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), WA); + } - LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), WA); ComputeCarry(); - - if (d == a) - gpr.Unlock(WA); - break; } case CarryFlag::InHostCarry: @@ -1229,18 +1224,16 @@ void JitArm64::subfex(UGeckoInstruction inst) { case CarryFlag::InPPCState: { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d)); - gpr.Unlock(WA); break; } case CarryFlag::InHostCarry: { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); MOVI2R(WA, ~i + j); ADC(gpr.R(d), WA, ARM64Reg::WZR); - gpr.Unlock(WA); break; } case CarryFlag::ConstantTrue: @@ -1274,23 +1267,30 @@ void JitArm64::subfex(UGeckoInstruction inst) else { gpr.BindToRegister(d, d == a || d == b); - ARM64Reg RB = mex ? gpr.GetReg() : gpr.R(b); - if (mex) - MOVI2R(RB, -1); + { + Arm64GPRCache::ScopedARM64Reg RB; + if (mex) + { + RB = gpr.GetScopedReg(); + MOVI2R(RB, -1); + } + else + { + RB = gpr.R(b); + } - if (js.carryFlag == CarryFlag::ConstantTrue) - { - CARRY_IF_NEEDED(SUB, SUBS, gpr.R(d), RB, gpr.R(a)); - } - else - { - LoadCarry(); - CARRY_IF_NEEDED(SBC, SBCS, gpr.R(d), RB, gpr.R(a)); + if (js.carryFlag == CarryFlag::ConstantTrue) + { + CARRY_IF_NEEDED(SUB, SUBS, gpr.R(d), RB, gpr.R(a)); + } + else + { + LoadCarry(); + CARRY_IF_NEEDED(SBC, SBCS, gpr.R(d), RB, gpr.R(a)); + } } ComputeCarry(); - if (mex) - gpr.Unlock(RB); } if (inst.Rc) @@ -1343,12 +1343,13 @@ void JitArm64::subfzex(UGeckoInstruction inst) { case CarryFlag::InPPCState: { - ARM64Reg WA = gpr.GetReg(); - LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - MVN(gpr.R(d), gpr.R(a)); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(d), WA); + { + auto WA = gpr.GetScopedReg(); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + MVN(gpr.R(d), gpr.R(a)); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(d), WA); + } ComputeCarry(); - gpr.Unlock(WA); break; } case CarryFlag::InHostCarry: @@ -1394,21 +1395,20 @@ void JitArm64::subfic(UGeckoInstruction inst) { const bool will_read = d == a; const bool is_zero = imm == 0; - const bool allocate_reg = will_read && !is_zero; gpr.BindToRegister(d, will_read); // d = imm - a ARM64Reg RD = gpr.R(d); - ARM64Reg WA = ARM64Reg::WZR; - if (!is_zero) { - WA = will_read ? gpr.GetReg() : RD; - MOVI2R(WA, imm); - } - CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); + Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); + if (!is_zero) + { + WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); + MOVI2R(WA, imm); + } - if (allocate_reg) - gpr.Unlock(WA); + CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); + } ComputeCarry(); } @@ -1433,10 +1433,9 @@ void JitArm64::addex(UGeckoInstruction inst) { case CarryFlag::InPPCState: { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ADDI2R(gpr.R(d), WA, i + j, gpr.R(d)); - gpr.Unlock(WA); break; } case CarryFlag::InHostCarry: @@ -1477,23 +1476,30 @@ void JitArm64::addex(UGeckoInstruction inst) else { gpr.BindToRegister(d, d == a || d == b); - ARM64Reg RB = mex ? gpr.GetReg() : gpr.R(b); - if (mex) - MOVI2R(RB, -1); + { + Arm64GPRCache::ScopedARM64Reg RB; + if (mex) + { + RB = gpr.GetScopedReg(); + MOVI2R(RB, -1); + } + else + { + RB = gpr.R(b); + } - if (js.carryFlag == CarryFlag::ConstantFalse) - { - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), RB); - } - else - { - LoadCarry(); - CARRY_IF_NEEDED(ADC, ADCS, gpr.R(d), gpr.R(a), RB); + if (js.carryFlag == CarryFlag::ConstantFalse) + { + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), RB); + } + else + { + LoadCarry(); + CARRY_IF_NEEDED(ADC, ADCS, gpr.R(d), gpr.R(a), RB); + } } ComputeCarry(); - if (mex) - gpr.Unlock(RB); } if (inst.Rc) @@ -1575,7 +1581,7 @@ void JitArm64::divwux(UGeckoInstruction inst) { UnsignedMagic m = UnsignedDivisionConstants(divisor); - ARM64Reg WI = allocate_reg ? gpr.GetReg() : RD; + auto WI = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); ARM64Reg XD = EncodeRegTo64(RD); MOVI2R(WI, m.multiplier); @@ -1590,9 +1596,6 @@ void JitArm64::divwux(UGeckoInstruction inst) } LSR(XD, XD, 32 + m.shift); - - if (allocate_reg) - gpr.Unlock(WI); } if (inst.Rc) @@ -1719,7 +1722,7 @@ void JitArm64::divwx(UGeckoInstruction inst) ARM64Reg RA = gpr.R(a); ARM64Reg RD = gpr.R(d); - ARM64Reg WA = allocate_reg ? gpr.GetReg() : RD; + auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); TST(RA, RA); ADDI2R(WA, RA, abs_val - 1, WA); @@ -1729,9 +1732,6 @@ void JitArm64::divwx(UGeckoInstruction inst) NEG(RD, WA, ArithOption(WA, ShiftType::ASR, MathUtil::IntLog2(abs_val))); else ASR(RD, WA, MathUtil::IntLog2(abs_val)); - - if (allocate_reg) - gpr.Unlock(WA); } else { @@ -1739,8 +1739,8 @@ void JitArm64::divwx(UGeckoInstruction inst) SignedMagic m = SignedDivisionConstants(divisor); ARM64Reg RD = gpr.R(d); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = allocate_reg ? gpr.GetReg() : RD; + auto WA = gpr.GetScopedReg(); + auto WB = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); ARM64Reg XD = EncodeRegTo64(RD); ARM64Reg XA = EncodeRegTo64(WA); @@ -1776,10 +1776,6 @@ void JitArm64::divwx(UGeckoInstruction inst) ASR(XD, XD, 32 + m.shift); ADD(RD, WA, RD); } - - gpr.Unlock(WA); - if (allocate_reg) - gpr.Unlock(WB); } if (inst.Rc) @@ -1982,8 +1978,7 @@ void JitArm64::srawx(UGeckoInstruction inst) else { gpr.BindToRegister(a, a == s); - - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); if (a != s) { @@ -2009,8 +2004,6 @@ void JitArm64::srawx(UGeckoInstruction inst) CSET(WA, CC_NEQ); ComputeCarry(WA); - - gpr.Unlock(WA); } } else @@ -2018,8 +2011,8 @@ void JitArm64::srawx(UGeckoInstruction inst) const bool will_read = a == b || a == s; gpr.BindToRegister(a, will_read); - const bool allocate_reg = will_read || js.op->wantsCA; - ARM64Reg WA = allocate_reg ? gpr.GetReg() : gpr.R(a); + auto WA = + will_read || js.op->wantsCA ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(a)); LSL(EncodeRegTo64(WA), EncodeRegTo64(gpr.R(s)), 32); ASRV(EncodeRegTo64(WA), EncodeRegTo64(WA), EncodeRegTo64(gpr.R(b))); @@ -2031,9 +2024,6 @@ void JitArm64::srawx(UGeckoInstruction inst) CSET(WA, CC_NEQ); ComputeCarry(WA); } - - if (allocate_reg) - gpr.Unlock(WA); } if (inst.Rc) @@ -2088,10 +2078,9 @@ void JitArm64::rlwimix(UGeckoInstruction inst) // No rotation // No mask inversion gpr.BindToRegister(a, true); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); UBFX(WA, gpr.R(s), lsb, width); BFI(gpr.R(a), WA, lsb, width); - gpr.Unlock(WA); } else if (inst.SH && inst.MB <= inst.ME) { @@ -2103,28 +2092,22 @@ void JitArm64::rlwimix(UGeckoInstruction inst) } else { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ROR(WA, gpr.R(s), (rot_dist + lsb) % 32); BFI(gpr.R(a), WA, lsb, width); - gpr.Unlock(WA); } } else { gpr.BindToRegister(a, true); - const bool allocate_reg = a == s; ARM64Reg RA = gpr.R(a); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = allocate_reg ? gpr.GetReg() : RA; + auto WA = gpr.GetScopedReg(); + auto WB = a == s ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RA); MOVI2R(WA, mask); BIC(WB, RA, WA); AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, rot_dist)); ORR(RA, WB, WA); - - gpr.Unlock(WA); - if (allocate_reg) - gpr.Unlock(WB); } if (inst.Rc) From cb29a29866ba9304ca0139ff5e6216b8ea34653c Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:17:25 +0200 Subject: [PATCH 03/10] JitArm64: Use ScopedARM64Reg --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 129 +++++++++++----------- 1 file changed, 62 insertions(+), 67 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index e46ccf9ee4..3caadf9918 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -222,12 +222,11 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) if (js.op->canEndBlock) { // also flush the program counter - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); MOVI2R(WA, js.compilerPC); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pc)); ADD(WA, WA, 4); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(npc)); - gpr.Unlock(WA); } Interpreter::Instruction instr = Interpreter::GetInterpreterOp(inst); @@ -243,24 +242,23 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) { if (js.isLastInstruction) { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(npc)); WriteExceptionExit(WA); - gpr.Unlock(WA); } else { // only exit if ppcstate.npc was changed - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(npc)); - ARM64Reg WB = gpr.GetReg(); - MOVI2R(WB, js.compilerPC + 4); - CMP(WB, WA); - gpr.Unlock(WB); + { + auto WB = gpr.GetScopedReg(); + MOVI2R(WB, js.compilerPC + 4); + CMP(WB, WA); + } FixupBranch c = B(CC_EQ); WriteExceptionExit(WA); SetJumpTarget(c); - gpr.Unlock(WA); } } else if (ShouldHandleFPExceptionForInstruction(js.op)) @@ -359,11 +357,12 @@ void JitArm64::IntializeSpeculativeConstants() SwitchToNearCode(); } - ARM64Reg tmp = gpr.GetReg(); - ARM64Reg value = gpr.R(i); - MOVI2R(tmp, compile_time_value); - CMP(value, tmp); - gpr.Unlock(tmp); + { + auto tmp = gpr.GetScopedReg(); + ARM64Reg value = gpr.R(i); + MOVI2R(tmp, compile_time_value); + CMP(value, tmp); + } FixupBranch no_fail = B(CCFlags::CC_EQ); B(fail); @@ -402,16 +401,15 @@ void JitArm64::MSRUpdated(u32 msr) } else { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); MOVI2R(WA, feature_flags); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); - gpr.Unlock(WA); } } void JitArm64::MSRUpdated(ARM64Reg msr) { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); // Update mem_ptr @@ -432,8 +430,6 @@ void JitArm64::MSRUpdated(ARM64Reg msr) if (other_feature_flags != 0) ORR(WA, WA, LogicalImm(other_feature_flags, GPRSize::B32)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); - - gpr.Unlock(WA); } void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return, @@ -631,35 +627,37 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a // function has been called! gpr.Lock(ARM64Reg::W30); } - // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack - ARM64Reg after_reg = ARM64Reg::INVALID_REG; - ARM64Reg reg_to_push; - const u64 feature_flags = m_ppc_state.feature_flags; - if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) + + const u8* host_address_after_return; { - after_reg = gpr.GetReg(); - reg_to_push = EncodeRegTo64(after_reg); - MOVI2R(reg_to_push, feature_flags << 32 | exit_address_after_return); + // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack + Arm64RegCache::ScopedARM64Reg after_reg; + ARM64Reg reg_to_push; + const u64 feature_flags = m_ppc_state.feature_flags; + if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) + { + after_reg = gpr.GetScopedReg(); + reg_to_push = EncodeRegTo64(after_reg); + MOVI2R(reg_to_push, feature_flags << 32 | exit_address_after_return); + } + else if (feature_flags == 0) + { + reg_to_push = EncodeRegTo64(exit_address_after_return_reg); + } + else + { + after_reg = gpr.GetScopedReg(); + reg_to_push = EncodeRegTo64(after_reg); + ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32, + reg_to_push); + } + + auto code_reg = gpr.GetScopedReg(); + constexpr s32 adr_offset = sizeof(u32) * 3; + host_address_after_return = GetCodePtr() + adr_offset; + ADR(EncodeRegTo64(code_reg), adr_offset); + STP(IndexType::Pre, EncodeRegTo64(code_reg), reg_to_push, ARM64Reg::SP, -16); } - else if (feature_flags == 0) - { - reg_to_push = EncodeRegTo64(exit_address_after_return_reg); - } - else - { - after_reg = gpr.GetReg(); - reg_to_push = EncodeRegTo64(after_reg); - ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32, - reg_to_push); - } - ARM64Reg code_reg = gpr.GetReg(); - constexpr s32 adr_offset = sizeof(u32) * 3; - const u8* host_address_after_return = GetCodePtr() + adr_offset; - ADR(EncodeRegTo64(code_reg), adr_offset); - STP(IndexType::Pre, EncodeRegTo64(code_reg), reg_to_push, ARM64Reg::SP, -16); - gpr.Unlock(code_reg); - if (after_reg != ARM64Reg::INVALID_REG) - gpr.Unlock(after_reg); FixupBranch skip_exit = BL(); DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed()); @@ -792,10 +790,9 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always void JitArm64::WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit) { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); WriteConditionalExceptionExit(exception, WA, Arm64Gen::ARM64Reg::INVALID_REG, increment_sp_on_exit); - gpr.Unlock(WA); } void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_gpr, ARM64Reg temp_fpr, @@ -1183,7 +1180,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) // asynchronous. if (jo.optimizeGatherPipe && gatherPipeIntCheck) { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); @@ -1209,8 +1206,6 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SwitchToNearCode(); SetJumpTarget(no_ext_exception); SetJumpTarget(exit); - - gpr.Unlock(WA); } } @@ -1224,12 +1219,11 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) // The only thing that currently sets op.skip is the BLR following optimization. // If any non-branch instruction starts setting that too, this will need to be changed. ASSERT(op.inst.hex == 0x4e800020); - const ARM64Reg bw_reg_a = gpr.GetReg(), bw_reg_b = gpr.GetReg(); + const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg(); const BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; WriteBranchWatch(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b, gpr_caller_save, fpr.GetCallerSavedUsed()); - gpr.Unlock(bw_reg_a, bw_reg_b); } } else @@ -1267,23 +1261,24 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) { + FixupBranch b1; // This instruction uses FPU - needs to add FP exception bailout - ARM64Reg WA = gpr.GetReg(); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); - FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit + { + auto WA = gpr.GetScopedReg(); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); + b1 = TBNZ(WA, 13); // Test FP enabled bit - FixupBranch far_addr = B(); - SwitchToFarCode(); - SetJumpTarget(far_addr); + FixupBranch far_addr = B(); + SwitchToFarCode(); + SetJumpTarget(far_addr); - gpr.Flush(FlushMode::MaintainState, WA); - fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); + gpr.Flush(FlushMode::MaintainState, WA); + fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32)); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - - gpr.Unlock(WA); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32)); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + } WriteExceptionExit(js.compilerPC, false, true); From 9805a8ac0a6278b82b07f627f746208b966bae6d Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:17:44 +0200 Subject: [PATCH 04/10] JitArm64_Branch: Use ScopedARM64Reg --- .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 356 +++++++++--------- 1 file changed, 171 insertions(+), 185 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index 01cd813f2d..a9df905b10 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -24,13 +24,12 @@ void JitArm64::sc(UGeckoInstruction inst) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - ARM64Reg WA = gpr.GetReg(); - - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - ORR(WA, WA, LogicalImm(EXCEPTION_SYSCALL, GPRSize::B32)); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - - gpr.Unlock(WA); + { + auto WA = gpr.GetScopedReg(); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + ORR(WA, WA, LogicalImm(EXCEPTION_SYSCALL, GPRSize::B32)); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + } WriteExceptionExit(js.compilerPC + 4, false, true); } @@ -51,28 +50,28 @@ void JitArm64::rfi(UGeckoInstruction inst) // R1 = MSR contents // R2 = Mask // R3 = Mask - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = gpr.GetReg(); - ARM64Reg WC = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); + { + auto WB = gpr.GetScopedReg(); + auto WC = gpr.GetScopedReg(); - LDR(IndexType::Unsigned, WC, PPC_REG, PPCSTATE_OFF(msr)); + LDR(IndexType::Unsigned, WC, PPC_REG, PPCSTATE_OFF(msr)); - ANDI2R(WC, WC, (~mask) & clearMSR13, WA); // rD = Masked MSR + ANDI2R(WC, WC, (~mask) & clearMSR13, WA); // rD = Masked MSR - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR1)); // rB contains SRR1 here + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR1)); // rB contains SRR1 here - ANDI2R(WA, WA, mask & clearMSR13, WB); // rB contains masked SRR1 here - ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1 + ANDI2R(WA, WA, mask & clearMSR13, WB); // rB contains masked SRR1 here + ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1 - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA - gpr.Unlock(WB, WC); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA + } MSRUpdated(WA); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0)); WriteExceptionExit(WA); - gpr.Unlock(WA); } template @@ -144,10 +143,10 @@ void JitArm64::bx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITBranchOff); - ARM64Reg WA = ARM64Reg::INVALID_REG; + Arm64GPRCache::ScopedARM64Reg WA = ARM64Reg::INVALID_REG; if (inst.LK) { - WA = gpr.GetReg(); + WA = gpr.GetScopedReg(); MOVI2R(WA, js.compilerPC + 4); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); } @@ -156,13 +155,12 @@ void JitArm64::bx(UGeckoInstruction inst) { if (IsDebuggingEnabled()) { - const ARM64Reg WB = gpr.GetReg(), WC = gpr.GetReg(); + const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg(); BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; if (WA != ARM64Reg::INVALID_REG && js.op->skipLRStack) gpr_caller_save[DecodeReg(WA)] = false; WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, fpr.GetCallerSavedUsed()); - gpr.Unlock(WB, WC); } if (inst.LK && !js.op->skipLRStack) { @@ -172,9 +170,6 @@ void JitArm64::bx(UGeckoInstruction inst) FakeLKExit(js.compilerPC + 4, WA); } - if (WA != ARM64Reg::INVALID_REG) - gpr.Unlock(WA); - return; } @@ -184,13 +179,12 @@ void JitArm64::bx(UGeckoInstruction inst) if (js.op->branchIsIdleLoop) { if (WA == ARM64Reg::INVALID_REG) - WA = gpr.GetReg(); + WA = gpr.GetScopedReg(); if (IsDebuggingEnabled()) { - const ARM64Reg WB = gpr.GetReg(); + const auto WB = gpr.GetScopedReg(); WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WA, WB, {}, {}); - gpr.Unlock(WB); } // make idle loops go faster @@ -198,7 +192,7 @@ void JitArm64::bx(UGeckoInstruction inst) MOVP2R(XA, &CoreTiming::GlobalIdle); BLR(XA); - gpr.Unlock(WA); + WA.Unlock(); WriteExceptionExit(js.op->branchTo); return; @@ -206,16 +200,12 @@ void JitArm64::bx(UGeckoInstruction inst) if (IsDebuggingEnabled()) { - const ARM64Reg WB = gpr.GetReg(), WC = gpr.GetReg(); + const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg(); const BitSet32 gpr_caller_save = WA != ARM64Reg::INVALID_REG ? BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS : BitSet32{}; WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, {}); - gpr.Unlock(WB, WC); } WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, WA); - - if (WA != ARM64Reg::INVALID_REG) - gpr.Unlock(WA); } void JitArm64::bcx(UGeckoInstruction inst) @@ -223,77 +213,79 @@ void JitArm64::bcx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITBranchOff); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = inst.LK || IsDebuggingEnabled() ? gpr.GetReg() : WA; - ARM64Reg WC = IsDebuggingEnabled() && inst.LK && !js.op->branchIsIdleLoop ? gpr.GetReg() : - ARM64Reg::INVALID_REG; + auto WA = gpr.GetScopedReg(); + auto WB = inst.LK || IsDebuggingEnabled() ? gpr.GetScopedReg() : + Arm64GPRCache::ScopedARM64Reg(WA.GetReg()); - FixupBranch pCTRDontBranch; - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); - SUBS(WA, WA, 1); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); + auto WC = IsDebuggingEnabled() && inst.LK && !js.op->branchIsIdleLoop ? + gpr.GetScopedReg() : + Arm64GPRCache::ScopedARM64Reg(ARM64Reg::INVALID_REG); - if (inst.BO & BO_BRANCH_IF_CTR_0) - pCTRDontBranch = B(CC_NEQ); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); + SUBS(WA, WA, 1); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); + + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B(CC_NEQ); + else + pCTRDontBranch = B(CC_EQ); + } + + FixupBranch pConditionDontBranch; + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + pConditionDontBranch = + JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); + } + + if (inst.LK) + { + MOVI2R(WA, js.compilerPC + 4); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); + } + + gpr.Flush(FlushMode::MaintainState, WB); + fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); + + if (IsDebuggingEnabled()) + { + ARM64Reg bw_reg_a, bw_reg_b; + // WC is only allocated when WA is needed for WriteExit and cannot be clobbered. + if (WC == ARM64Reg::INVALID_REG) + bw_reg_a = WA, bw_reg_b = WB; + else + bw_reg_a = WB, bw_reg_b = WC; + const BitSet32 gpr_caller_save = + gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, bw_reg_a, bw_reg_b, + gpr_caller_save, fpr.GetCallerSavedUsed()); + } + if (js.op->branchIsIdleLoop) + { + // make idle loops go faster + ARM64Reg XA = EncodeRegTo64(WA); + + MOVP2R(XA, &CoreTiming::GlobalIdle); + BLR(XA); + + WriteExceptionExit(js.op->branchTo); + } else - pCTRDontBranch = B(CC_EQ); + { + WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, WA); + } + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget(pConditionDontBranch); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget(pCTRDontBranch); } - FixupBranch pConditionDontBranch; - - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit - { - pConditionDontBranch = - JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - } - - if (inst.LK) - { - MOVI2R(WA, js.compilerPC + 4); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); - } - - gpr.Flush(FlushMode::MaintainState, WB); - fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - - if (IsDebuggingEnabled()) - { - ARM64Reg bw_reg_a, bw_reg_b; - // WC is only allocated when WA is needed for WriteExit and cannot be clobbered. - if (WC == ARM64Reg::INVALID_REG) - bw_reg_a = WA, bw_reg_b = WB; - else - bw_reg_a = WB, bw_reg_b = WC; - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, bw_reg_a, bw_reg_b, - gpr_caller_save, fpr.GetCallerSavedUsed()); - } - if (js.op->branchIsIdleLoop) - { - // make idle loops go faster - ARM64Reg XA = EncodeRegTo64(WA); - - MOVP2R(XA, &CoreTiming::GlobalIdle); - BLR(XA); - - WriteExceptionExit(js.op->branchTo); - } - else - { - WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, WA); - } - - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) - SetJumpTarget(pConditionDontBranch); - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) - SetJumpTarget(pCTRDontBranch); - - if (WC != ARM64Reg::INVALID_REG) - gpr.Unlock(WC); - if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { gpr.Flush(FlushMode::All, WA); @@ -311,10 +303,6 @@ void JitArm64::bcx(UGeckoInstruction inst) WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save, fpr.GetCallerSavedUsed()); } - - gpr.Unlock(WA); - if (WB != WA) - gpr.Unlock(WB); } void JitArm64::bcctrx(UGeckoInstruction inst) @@ -337,34 +325,29 @@ void JitArm64::bcctrx(UGeckoInstruction inst) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - ARM64Reg WB = ARM64Reg::INVALID_REG; + Arm64GPRCache::ScopedARM64Reg WB = ARM64Reg::INVALID_REG; if (inst.LK_3) { - WB = gpr.GetReg(); + WB = gpr.GetScopedReg(); MOVI2R(WB, js.compilerPC + 4); STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); } - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); AND(WA, WA, LogicalImm(~0x3, GPRSize::B32)); if (IsDebuggingEnabled()) { - const ARM64Reg WC = gpr.GetReg(), WD = gpr.GetReg(); + const auto WC = gpr.GetScopedReg(), WD = gpr.GetScopedReg(); BitSet32 gpr_caller_save = BitSet32{DecodeReg(WA)}; if (WB != ARM64Reg::INVALID_REG) gpr_caller_save[DecodeReg(WB)] = true; gpr_caller_save &= CALLER_SAVED_GPRS; WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WC, WD, gpr_caller_save, {}); - gpr.Unlock(WC, WD); } WriteExit(WA, inst.LK_3, js.compilerPC + 4, WB); - - if (WB != ARM64Reg::INVALID_REG) - gpr.Unlock(WB); - gpr.Unlock(WA); } void JitArm64::bclrx(UGeckoInstruction inst) @@ -375,85 +358,92 @@ void JitArm64::bclrx(UGeckoInstruction inst) bool conditional = (inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0; - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = - conditional || inst.LK || IsDebuggingEnabled() ? gpr.GetReg() : ARM64Reg::INVALID_REG; - ARM64Reg WC = IsDebuggingEnabled() ? gpr.GetReg() : ARM64Reg::INVALID_REG; - - FixupBranch pCTRDontBranch; - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + auto WA = gpr.GetScopedReg(); + Arm64GPRCache::ScopedARM64Reg WB; + if (conditional || inst.LK || IsDebuggingEnabled()) { - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); - SUBS(WA, WA, 1); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); - - if (inst.BO & BO_BRANCH_IF_CTR_0) - pCTRDontBranch = B(CC_NEQ); - else - pCTRDontBranch = B(CC_EQ); + WB = gpr.GetScopedReg(); } - FixupBranch pConditionDontBranch; - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - pConditionDontBranch = - JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - } - - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); - AND(WA, WA, LogicalImm(~0x3, GPRSize::B32)); - - if (inst.LK) - { - MOVI2R(WB, js.compilerPC + 4); - STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); - } - - gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB); - fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG); - - if (IsDebuggingEnabled()) - { - BitSet32 gpr_caller_save; - BitSet32 fpr_caller_save; - if (conditional) + Arm64GPRCache::ScopedARM64Reg WC; + if (IsDebuggingEnabled()) { - gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; - if (js.op->branchIsIdleLoop) - gpr_caller_save[DecodeReg(WA)] = false; - fpr_caller_save = fpr.GetCallerSavedUsed(); + WC = gpr.GetScopedReg(); + } + + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); + SUBS(WA, WA, 1); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); + + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B(CC_NEQ); + else + pCTRDontBranch = B(CC_EQ); + } + + FixupBranch pConditionDontBranch; + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + pConditionDontBranch = + JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); + } + + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); + AND(WA, WA, LogicalImm(~0x3, GPRSize::B32)); + + if (inst.LK) + { + MOVI2R(WB, js.compilerPC + 4); + STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); + } + + gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB); + fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG); + + if (IsDebuggingEnabled()) + { + BitSet32 gpr_caller_save; + BitSet32 fpr_caller_save; + if (conditional) + { + gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; + if (js.op->branchIsIdleLoop) + gpr_caller_save[DecodeReg(WA)] = false; + fpr_caller_save = fpr.GetCallerSavedUsed(); + } + else + { + gpr_caller_save = + js.op->branchIsIdleLoop ? BitSet32{} : BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS; + fpr_caller_save = {}; + } + WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WB, WC, gpr_caller_save, + fpr_caller_save); + } + if (js.op->branchIsIdleLoop) + { + // make idle loops go faster + ARM64Reg XA = EncodeRegTo64(WA); + + MOVP2R(XA, &CoreTiming::GlobalIdle); + BLR(XA); + + WriteExceptionExit(js.op->branchTo); } else { - gpr_caller_save = - js.op->branchIsIdleLoop ? BitSet32{} : BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS; - fpr_caller_save = {}; + WriteBLRExit(WA); } - WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WB, WC, gpr_caller_save, - fpr_caller_save); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget(pConditionDontBranch); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget(pCTRDontBranch); } - if (js.op->branchIsIdleLoop) - { - // make idle loops go faster - ARM64Reg XA = EncodeRegTo64(WA); - - MOVP2R(XA, &CoreTiming::GlobalIdle); - BLR(XA); - - WriteExceptionExit(js.op->branchTo); - } - else - { - WriteBLRExit(WA); - } - - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) - SetJumpTarget(pConditionDontBranch); - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) - SetJumpTarget(pCTRDontBranch); - - if (WC != ARM64Reg::INVALID_REG) - gpr.Unlock(WC); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { @@ -472,8 +462,4 @@ void JitArm64::bclrx(UGeckoInstruction inst) WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save, fpr.GetCallerSavedUsed()); } - - gpr.Unlock(WA); - if (WB != ARM64Reg::INVALID_REG) - gpr.Unlock(WB); } From ac3d3de66d462f126097326f4b7052f0140abf83 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:18:14 +0200 Subject: [PATCH 05/10] JitArm64_FloatingPoint: Use ScopedARM64Reg --- .../JitArm64/JitArm64_FloatingPoint.cpp | 327 +++++++++--------- 1 file changed, 158 insertions(+), 169 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 06e3974b9c..4f66b94702 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -102,154 +102,151 @@ void JitArm64::fp_arith(UGeckoInstruction inst) const ARM64Reg VC = use_c ? reg_encoder(fpr.R(c, type)) : ARM64Reg::INVALID_REG; const ARM64Reg VD = reg_encoder(fpr.RW(d, type_out)); - ARM64Reg V0Q = ARM64Reg::INVALID_REG; - ARM64Reg V1Q = ARM64Reg::INVALID_REG; - - ARM64Reg rounded_c_reg = VC; - if (round_c) { - ASSERT_MSG(DYNA_REC, !inputs_are_singles, "Tried to apply 25-bit precision to single"); + Arm64FPRCache::ScopedARM64Reg V0Q = ARM64Reg::INVALID_REG; + Arm64FPRCache::ScopedARM64Reg V1Q = ARM64Reg::INVALID_REG; - V0Q = fpr.GetReg(); - rounded_c_reg = reg_encoder(V0Q); - Force25BitPrecision(rounded_c_reg, VC); - } - - ARM64Reg inaccurate_fma_reg = VD; - if (fma && inaccurate_fma && VD == VB) - { - if (V0Q == ARM64Reg::INVALID_REG) - V0Q = fpr.GetReg(); - inaccurate_fma_reg = reg_encoder(V0Q); - } - - ARM64Reg result_reg = VD; - const bool preserve_d = - m_accurate_nans && (VD == VA || (use_b && VD == VB) || (use_c && VD == VC)); - if (preserve_d) - { - V1Q = fpr.GetReg(); - result_reg = reg_encoder(V1Q); - } - - switch (op5) - { - case 18: - m_float_emit.FDIV(result_reg, VA, VB); - break; - case 20: - m_float_emit.FSUB(result_reg, VA, VB); - break; - case 21: - m_float_emit.FADD(result_reg, VA, VB); - break; - case 25: - m_float_emit.FMUL(result_reg, VA, rounded_c_reg); - break; - // While it may seem like PowerPC's nmadd/nmsub map to AArch64's nmadd/msub [sic], - // the subtly different definitions affect how signed zeroes are handled. - // Also, PowerPC's nmadd/nmsub perform rounding before the final negation. - // So, we negate using a separate FNEG instruction instead of using AArch64's nmadd/msub. - case 28: // fmsub: "D = A*C - B" vs "Vd = (-Va) + Vn*Vm" - case 30: // fnmsub: "D = -(A*C - B)" vs "Vd = -((-Va) + Vn*Vm)" - if (inaccurate_fma) + ARM64Reg rounded_c_reg = VC; + if (round_c) { - m_float_emit.FMUL(inaccurate_fma_reg, VA, rounded_c_reg); - m_float_emit.FSUB(result_reg, inaccurate_fma_reg, VB); - } - else - { - m_float_emit.FNMSUB(result_reg, VA, rounded_c_reg, VB); - } - break; - case 29: // fmadd: "D = A*C + B" vs "Vd = Va + Vn*Vm" - case 31: // fnmadd: "D = -(A*C + B)" vs "Vd = -(Va + Vn*Vm)" - if (inaccurate_fma) - { - m_float_emit.FMUL(inaccurate_fma_reg, VA, rounded_c_reg); - m_float_emit.FADD(result_reg, inaccurate_fma_reg, VB); - } - else - { - m_float_emit.FMADD(result_reg, VA, rounded_c_reg, VB); - } - break; - default: - ASSERT_MSG(DYNA_REC, 0, "fp_arith"); - break; - } + ASSERT_MSG(DYNA_REC, !inputs_are_singles, "Tried to apply 25-bit precision to single"); - Common::SmallVector nan_fixups; - if (m_accurate_nans) - { - // Check if we need to handle NaNs - m_float_emit.FCMP(result_reg); - FixupBranch no_nan = B(CCFlags::CC_VC); - FixupBranch nan = B(); - SetJumpTarget(no_nan); - - SwitchToFarCode(); - SetJumpTarget(nan); - - Common::SmallVector inputs; - inputs.push_back(VA); - if (use_b && VA != VB) - inputs.push_back(VB); - if (use_c && VA != VC && (!use_b || VB != VC)) - inputs.push_back(VC); - - // If any inputs are NaNs, pick the first NaN of them and set its quiet bit. - // However, we can skip checking the last input, because if exactly one input is NaN, AArch64 - // arithmetic instructions automatically pick that NaN and make it quiet, just like we want. - for (size_t i = 0; i < inputs.size() - 1; ++i) - { - const ARM64Reg input = inputs[i]; - - m_float_emit.FCMP(input); - FixupBranch skip = B(CCFlags::CC_VC); - - // Make the NaN quiet - m_float_emit.FADD(VD, input, input); - - nan_fixups.push_back(B()); - - SetJumpTarget(skip); + V0Q = fpr.GetScopedReg(); + rounded_c_reg = reg_encoder(V0Q); + Force25BitPrecision(rounded_c_reg, VC); } - std::optional nan_early_fixup; + ARM64Reg inaccurate_fma_reg = VD; + if (fma && inaccurate_fma && VD == VB) + { + if (V0Q == ARM64Reg::INVALID_REG) + V0Q = fpr.GetScopedReg(); + inaccurate_fma_reg = reg_encoder(V0Q); + } + + ARM64Reg result_reg = VD; + const bool preserve_d = + m_accurate_nans && (VD == VA || (use_b && VD == VB) || (use_c && VD == VC)); + if (preserve_d) + { + V1Q = fpr.GetScopedReg(); + result_reg = reg_encoder(V1Q); + } + + switch (op5) + { + case 18: + m_float_emit.FDIV(result_reg, VA, VB); + break; + case 20: + m_float_emit.FSUB(result_reg, VA, VB); + break; + case 21: + m_float_emit.FADD(result_reg, VA, VB); + break; + case 25: + m_float_emit.FMUL(result_reg, VA, rounded_c_reg); + break; + // While it may seem like PowerPC's nmadd/nmsub map to AArch64's nmadd/msub [sic], + // the subtly different definitions affect how signed zeroes are handled. + // Also, PowerPC's nmadd/nmsub perform rounding before the final negation. + // So, we negate using a separate FNEG instruction instead of using AArch64's nmadd/msub. + case 28: // fmsub: "D = A*C - B" vs "Vd = (-Va) + Vn*Vm" + case 30: // fnmsub: "D = -(A*C - B)" vs "Vd = -((-Va) + Vn*Vm)" + if (inaccurate_fma) + { + m_float_emit.FMUL(inaccurate_fma_reg, VA, rounded_c_reg); + m_float_emit.FSUB(result_reg, inaccurate_fma_reg, VB); + } + else + { + m_float_emit.FNMSUB(result_reg, VA, rounded_c_reg, VB); + } + break; + case 29: // fmadd: "D = A*C + B" vs "Vd = Va + Vn*Vm" + case 31: // fnmadd: "D = -(A*C + B)" vs "Vd = -(Va + Vn*Vm)" + if (inaccurate_fma) + { + m_float_emit.FMUL(inaccurate_fma_reg, VA, rounded_c_reg); + m_float_emit.FADD(result_reg, inaccurate_fma_reg, VB); + } + else + { + m_float_emit.FMADD(result_reg, VA, rounded_c_reg, VB); + } + break; + default: + ASSERT_MSG(DYNA_REC, 0, "fp_arith"); + break; + } + + Common::SmallVector nan_fixups; + if (m_accurate_nans) + { + // Check if we need to handle NaNs + m_float_emit.FCMP(result_reg); + FixupBranch no_nan = B(CCFlags::CC_VC); + FixupBranch nan = B(); + SetJumpTarget(no_nan); + + SwitchToFarCode(); + SetJumpTarget(nan); + + Common::SmallVector inputs; + inputs.push_back(VA); + if (use_b && VA != VB) + inputs.push_back(VB); + if (use_c && VA != VC && (!use_b || VB != VC)) + inputs.push_back(VC); + + // If any inputs are NaNs, pick the first NaN of them and set its quiet bit. + // However, we can skip checking the last input, because if exactly one input is NaN, AArch64 + // arithmetic instructions automatically pick that NaN and make it quiet, just like we want. + for (size_t i = 0; i < inputs.size() - 1; ++i) + { + const ARM64Reg input = inputs[i]; + + m_float_emit.FCMP(input); + FixupBranch skip = B(CCFlags::CC_VC); + + // Make the NaN quiet + m_float_emit.FADD(VD, input, input); + + nan_fixups.push_back(B()); + + SetJumpTarget(skip); + } + + std::optional nan_early_fixup; + if (negate_result) + { + // If we have a NaN, we must not execute FNEG. + if (result_reg != VD) + m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg)); + nan_fixups.push_back(B()); + } + else + { + nan_early_fixup = B(); + } + + SwitchToNearCode(); + + if (nan_early_fixup) + SetJumpTarget(*nan_early_fixup); + } + + // PowerPC's nmadd/nmsub perform rounding before the final negation, which is not the case + // for any of AArch64's FMA instructions, so we negate using a separate instruction. if (negate_result) - { - // If we have a NaN, we must not execute FNEG. - if (result_reg != VD) - m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg)); - nan_fixups.push_back(B()); - } - else - { - nan_early_fixup = B(); - } + m_float_emit.FNEG(VD, result_reg); + else if (result_reg != VD) + m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg)); - SwitchToNearCode(); - - if (nan_early_fixup) - SetJumpTarget(*nan_early_fixup); + for (FixupBranch fixup : nan_fixups) + SetJumpTarget(fixup); } - // PowerPC's nmadd/nmsub perform rounding before the final negation, which is not the case - // for any of AArch64's FMA instructions, so we negate using a separate instruction. - if (negate_result) - m_float_emit.FNEG(VD, result_reg); - else if (result_reg != VD) - m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg)); - - for (FixupBranch fixup : nan_fixups) - SetJumpTarget(fixup); - - if (V0Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V0Q); - if (V1Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V1Q); - if (output_is_single) { ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(), @@ -449,43 +446,40 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper) gpr.BindCRToRegister(crf, false); const ARM64Reg XA = gpr.CR(crf); - ARM64Reg fpscr_reg = ARM64Reg::INVALID_REG; + Arm64GPRCache::ScopedARM64Reg fpscr_reg = ARM64Reg::INVALID_REG; if (fprf) { - fpscr_reg = gpr.GetReg(); + fpscr_reg = gpr.GetScopedReg(); LDR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr)); AND(fpscr_reg, fpscr_reg, LogicalImm(~FPCC_MASK, GPRSize::B32)); } - ARM64Reg V0Q = ARM64Reg::INVALID_REG; - ARM64Reg V1Q = ARM64Reg::INVALID_REG; - if (upper_a) { - V0Q = fpr.GetReg(); - m_float_emit.DUP(singles ? 32 : 64, paired_reg_encoder(V0Q), paired_reg_encoder(VA), 1); - VA = reg_encoder(V0Q); - } - if (upper_b) - { - if (a == b) + Arm64FPRCache::ScopedARM64Reg V0Q; + Arm64FPRCache::ScopedARM64Reg V1Q; + if (upper_a) { - VB = VA; + V0Q = fpr.GetScopedReg(); + m_float_emit.DUP(singles ? 32 : 64, paired_reg_encoder(V0Q), paired_reg_encoder(VA), 1); + VA = reg_encoder(V0Q); } - else + if (upper_b) { - V1Q = fpr.GetReg(); - m_float_emit.DUP(singles ? 32 : 64, paired_reg_encoder(V1Q), paired_reg_encoder(VB), 1); - VB = reg_encoder(V1Q); + if (a == b) + { + VB = VA; + } + else + { + V1Q = fpr.GetScopedReg(); + m_float_emit.DUP(singles ? 32 : 64, paired_reg_encoder(V1Q), paired_reg_encoder(VB), 1); + VB = reg_encoder(V1Q); + } } + + m_float_emit.FCMP(VA, VB); } - m_float_emit.FCMP(VA, VB); - - if (V0Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V0Q); - if (V1Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V1Q); - FixupBranch pNaN, pLesser, pGreater; FixupBranch continue1, continue2, continue3; @@ -538,7 +532,6 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper) if (fprf) { STR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscr_reg); } } @@ -572,7 +565,7 @@ void JitArm64::fctiwx(UGeckoInstruction inst) if (single) { - const ARM64Reg V0 = fpr.GetReg(); + const auto V0 = fpr.GetScopedReg(); if (is_fctiwzx) { @@ -589,12 +582,10 @@ void JitArm64::fctiwx(UGeckoInstruction inst) m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); - - fpr.Unlock(V0); } else { - const ARM64Reg WA = gpr.GetReg(); + const auto WA = gpr.GetScopedReg(); if (is_fctiwzx) { @@ -608,8 +599,6 @@ void JitArm64::fctiwx(UGeckoInstruction inst) ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, GPRSize::B64)); m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA)); - - gpr.Unlock(WA); } ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true), From 942025004663cba4f436a43fdc50887e6f3464b0 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:18:34 +0200 Subject: [PATCH 06/10] JitArm64_LoadStore: Use ScopedARM64Reg --- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 1eae3d923d..8e5cb47940 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -538,9 +538,12 @@ void JitArm64::lmw(UGeckoInstruction inst) else ADDI2R(addr_reg, gpr.R(a), offset, addr_reg); - ARM64Reg addr_base_reg = a_is_addr_base_reg ? ARM64Reg::INVALID_REG : gpr.GetReg(); + Arm64RegCache::ScopedARM64Reg addr_base_reg; if (!a_is_addr_base_reg) + { + addr_base_reg = gpr.GetScopedReg(); MOV(addr_base_reg, addr_reg); + } BitSet32 gprs_to_discard{}; if (!jo.memcheck) @@ -628,8 +631,6 @@ void JitArm64::lmw(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); if (jo.memcheck || !jo.fastmem) gpr.Unlock(ARM64Reg::W0); - if (!a_is_addr_base_reg) - gpr.Unlock(addr_base_reg); } void JitArm64::stmw(UGeckoInstruction inst) @@ -655,9 +656,12 @@ void JitArm64::stmw(UGeckoInstruction inst) else ADDI2R(addr_reg, gpr.R(a), offset, addr_reg); - ARM64Reg addr_base_reg = a_is_addr_base_reg ? ARM64Reg::INVALID_REG : gpr.GetReg(); + Arm64GPRCache::ScopedARM64Reg addr_base_reg; if (!a_is_addr_base_reg) + { + addr_base_reg = gpr.GetScopedReg(); MOV(addr_base_reg, addr_reg); + } BitSet32 gprs_to_discard{}; if (!jo.memcheck) @@ -748,8 +752,6 @@ void JitArm64::stmw(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!jo.fastmem) gpr.Unlock(ARM64Reg::W0); - if (!a_is_addr_base_reg) - gpr.Unlock(addr_base_reg); } void JitArm64::dcbx(UGeckoInstruction inst) @@ -786,8 +788,8 @@ void JitArm64::dcbx(UGeckoInstruction inst) // bdnz afterwards! So if we invalidate a single cache line, we don't adjust the registers at // all, if we invalidate 2 cachelines we adjust the registers by one step, and so on. - const ARM64Reg reg_cycle_count = gpr.GetReg(); - const ARM64Reg reg_downcount = gpr.GetReg(); + const auto reg_cycle_count = gpr.GetScopedReg(); + const auto reg_downcount = gpr.GetScopedReg(); // Figure out how many loops we want to do. const u8 cycle_count_per_loop = @@ -855,12 +857,9 @@ void JitArm64::dcbx(UGeckoInstruction inst) SetJumpTarget(branch_out); SetJumpTarget(branch_over); } - - gpr.Unlock(reg_cycle_count, reg_downcount); } constexpr ARM64Reg effective_addr = WB; - const ARM64Reg physical_addr = gpr.GetReg(); if (a) ADD(effective_addr, gpr.R(a), gpr.R(b)); @@ -874,6 +873,8 @@ void JitArm64::dcbx(UGeckoInstruction inst) ADD(gpr.R(b), gpr.R(b), WA, ArithOption(WA, ShiftType::LSL, 5)); // Rb += (WA * 32) } + auto physical_addr = gpr.GetScopedReg(); + // Translate effective address to physical address. const u8* loop_start = GetCodePtr(); FixupBranch bat_lookup_failed; @@ -939,7 +940,7 @@ void JitArm64::dcbx(UGeckoInstruction inst) SwitchToNearCode(); SetJumpTarget(near_addr); - gpr.Unlock(WA, WB, physical_addr); + gpr.Unlock(WA, WB); if (make_loop) gpr.Unlock(loop_counter); } From 23327064f6cef33622e79603c2e59d7752bcded9 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:18:46 +0200 Subject: [PATCH 07/10] JitArm64_LoadStoreFloating: Use ScopedARM64Reg --- .../Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 0e471e355b..6bac5dc656 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -268,14 +268,14 @@ void JitArm64::stfXX(UGeckoInstruction inst) const bool have_single = fpr.IsSingle(inst.FS, true); - ARM64Reg V0 = + Arm64FPRCache::ScopedARM64Reg V0 = fpr.R(inst.FS, want_single && have_single ? RegType::LowerPairSingle : RegType::LowerPair); if (want_single && !have_single) { - const ARM64Reg single_reg = fpr.GetReg(); + auto single_reg = fpr.GetScopedReg(); ConvertDoubleToSingleLower(inst.FS, single_reg, V0); - V0 = single_reg; + V0 = std::move(single_reg); } gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); @@ -425,9 +425,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - if (want_single && !have_single) - fpr.Unlock(V0); - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!jo.fastmem) From 3b251dbb2acdd6a411a8f9d35279a88dfb62ed39 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:18:58 +0200 Subject: [PATCH 08/10] JitArm64_LoadStorePaired: Use ScopedARM64Reg --- .../Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 5d1e561eef..eb8b4d015c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -173,20 +173,21 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) const bool have_single = fpr.IsSingle(inst.RS); - ARM64Reg VS = fpr.R(inst.RS, have_single ? RegType::Single : RegType::Register); + Arm64FPRCache::ScopedARM64Reg VS = + fpr.R(inst.RS, have_single ? RegType::Single : RegType::Register); if (js.assumeNoPairedQuantize) { if (!have_single) { - const ARM64Reg single_reg = fpr.GetReg(); + auto single_reg = fpr.GetScopedReg(); if (w) m_float_emit.FCVT(32, 64, EncodeRegToDouble(single_reg), EncodeRegToDouble(VS)); else m_float_emit.FCVTN(32, EncodeRegToDouble(single_reg), EncodeRegToDouble(VS)); - VS = single_reg; + VS = std::move(single_reg); } } else @@ -279,9 +280,6 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) MOV(gpr.R(inst.RA), addr_reg); } - if (js.assumeNoPairedQuantize && !have_single) - fpr.Unlock(VS); - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize || !jo.fastmem) From be2b4667434d6adc818493d28c182b24370b13a1 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:20:32 +0200 Subject: [PATCH 09/10] JitArm64_Paired: Use ScopedARM64Reg --- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 424 +++++++++--------- 1 file changed, 211 insertions(+), 213 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 23a06e48ca..900d9c87c1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -108,201 +108,196 @@ void JitArm64::ps_arith(UGeckoInstruction inst) const ARM64Reg VC = use_c ? reg_encoder(fpr.R(c, type)) : ARM64Reg::INVALID_REG; const ARM64Reg VD = reg_encoder(fpr.RW(d, type)); - ARM64Reg V0Q = ARM64Reg::INVALID_REG; - ARM64Reg V1Q = ARM64Reg::INVALID_REG; - ARM64Reg V2Q = ARM64Reg::INVALID_REG; - - ARM64Reg rounded_c_reg = VC; - if (round_c) { - ASSERT_MSG(DYNA_REC, !singles, "Tried to apply 25-bit precision to single"); + Arm64FPRCache::ScopedARM64Reg V0Q = ARM64Reg::INVALID_REG; + Arm64FPRCache::ScopedARM64Reg V1Q = ARM64Reg::INVALID_REG; + Arm64FPRCache::ScopedARM64Reg V2Q = ARM64Reg::INVALID_REG; - V0Q = fpr.GetReg(); - rounded_c_reg = reg_encoder(V0Q); - Force25BitPrecision(rounded_c_reg, VC); - } - - ARM64Reg inaccurate_fma_reg = VD; - if (fma && inaccurate_fma && VD == VB) - { - if (V0Q == ARM64Reg::INVALID_REG) - V0Q = fpr.GetReg(); - inaccurate_fma_reg = reg_encoder(V0Q); - } - - ARM64Reg result_reg = VD; - const bool need_accurate_fma_reg = - fma && !inaccurate_fma && (msub || VD != VB) && (VD == VA || VD == rounded_c_reg); - const bool preserve_d = - m_accurate_nans && (VD == VA || (use_b && VD == VB) || (use_c && VD == VC)); - if (need_accurate_fma_reg || preserve_d) - { - V1Q = fpr.GetReg(); - result_reg = reg_encoder(V1Q); - } - - if (m_accurate_nans) - { - if (V0Q == ARM64Reg::INVALID_REG) - V0Q = fpr.GetReg(); - - if (duplicated_c || VD == result_reg) - V2Q = fpr.GetReg(); - } - - switch (op5) - { - case 12: // ps_muls0: d = a * c.ps0 - m_float_emit.FMUL(size, result_reg, VA, rounded_c_reg, 0); - break; - case 13: // ps_muls1: d = a * c.ps1 - m_float_emit.FMUL(size, result_reg, VA, rounded_c_reg, 1); - break; - case 14: // ps_madds0: d = a * c.ps0 + b - if (inaccurate_fma) + ARM64Reg rounded_c_reg = VC; + if (round_c) { - m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg, 0); - m_float_emit.FADD(size, result_reg, inaccurate_fma_reg, VB); - } - else - { - if (result_reg != VB) - m_float_emit.MOV(result_reg, VB); - m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg, 0); - } - break; - case 15: // ps_madds1: d = a * c.ps1 + b - if (inaccurate_fma) - { - m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg, 1); - m_float_emit.FADD(size, result_reg, inaccurate_fma_reg, VB); - } - else - { - if (result_reg != VB) - m_float_emit.MOV(result_reg, VB); - m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg, 1); - } - break; - case 18: // ps_div - m_float_emit.FDIV(size, result_reg, VA, VB); - break; - case 20: // ps_sub - m_float_emit.FSUB(size, result_reg, VA, VB); - break; - case 21: // ps_add - m_float_emit.FADD(size, result_reg, VA, VB); - break; - case 25: // ps_mul - m_float_emit.FMUL(size, result_reg, VA, rounded_c_reg); - break; - case 28: // ps_msub: d = a * c - b - case 30: // ps_nmsub: d = -(a * c - b) - if (inaccurate_fma) - { - m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg); - m_float_emit.FSUB(size, result_reg, inaccurate_fma_reg, VB); - } - else - { - m_float_emit.FNEG(size, result_reg, VB); - m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg); - } - break; - case 29: // ps_madd: d = a * c + b - case 31: // ps_nmadd: d = -(a * c + b) - if (inaccurate_fma) - { - m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg); - m_float_emit.FADD(size, result_reg, inaccurate_fma_reg, VB); - } - else - { - if (result_reg != VB) - m_float_emit.MOV(result_reg, VB); - m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg); - } - break; - default: - ASSERT_MSG(DYNA_REC, 0, "ps_arith - invalid op"); - break; - } + ASSERT_MSG(DYNA_REC, !singles, "Tried to apply 25-bit precision to single"); - FixupBranch nan_fixup; - if (m_accurate_nans) - { - const ARM64Reg nan_temp_reg = singles ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q); - const ARM64Reg nan_temp_reg_paired = reg_encoder(V0Q); - - // Check if we need to handle NaNs - - m_float_emit.FMAXP(nan_temp_reg, result_reg); - m_float_emit.FCMP(nan_temp_reg); - FixupBranch no_nan = B(CCFlags::CC_VC); - FixupBranch nan = B(); - SetJumpTarget(no_nan); - - SwitchToFarCode(); - SetJumpTarget(nan); - - // Pick the right NaNs - - const auto check_input = [&](ARM64Reg input) { - m_float_emit.FCMEQ(size, nan_temp_reg_paired, input, input); - m_float_emit.BIF(result_reg, input, nan_temp_reg_paired); - }; - - ARM64Reg c_reg_for_nan_purposes = VC; - if (duplicated_c) - { - c_reg_for_nan_purposes = reg_encoder(V2Q); - m_float_emit.DUP(size, c_reg_for_nan_purposes, VC, op5 & 0x1); + V0Q = fpr.GetScopedReg(); + rounded_c_reg = reg_encoder(V0Q); + Force25BitPrecision(rounded_c_reg, VC); } - if (use_c) - check_input(c_reg_for_nan_purposes); + ARM64Reg inaccurate_fma_reg = VD; + if (fma && inaccurate_fma && VD == VB) + { + if (V0Q == ARM64Reg::INVALID_REG) + V0Q = fpr.GetScopedReg(); + inaccurate_fma_reg = reg_encoder(V0Q); + } - if (use_b && (!use_c || VB != c_reg_for_nan_purposes)) - check_input(VB); + ARM64Reg result_reg = VD; + const bool need_accurate_fma_reg = + fma && !inaccurate_fma && (msub || VD != VB) && (VD == VA || VD == rounded_c_reg); + const bool preserve_d = + m_accurate_nans && (VD == VA || (use_b && VD == VB) || (use_c && VD == VC)); + if (need_accurate_fma_reg || preserve_d) + { + V1Q = fpr.GetScopedReg(); + result_reg = reg_encoder(V1Q); + } - if ((!use_b || VA != VB) && (!use_c || VA != c_reg_for_nan_purposes)) - check_input(VA); + if (m_accurate_nans) + { + if (V0Q == ARM64Reg::INVALID_REG) + V0Q = fpr.GetScopedReg(); - // Make the NaNs quiet + if (duplicated_c || VD == result_reg) + V2Q = fpr.GetScopedReg(); + } - const ARM64Reg quiet_nan_reg = VD == result_reg ? reg_encoder(V2Q) : VD; + switch (op5) + { + case 12: // ps_muls0: d = a * c.ps0 + m_float_emit.FMUL(size, result_reg, VA, rounded_c_reg, 0); + break; + case 13: // ps_muls1: d = a * c.ps1 + m_float_emit.FMUL(size, result_reg, VA, rounded_c_reg, 1); + break; + case 14: // ps_madds0: d = a * c.ps0 + b + if (inaccurate_fma) + { + m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg, 0); + m_float_emit.FADD(size, result_reg, inaccurate_fma_reg, VB); + } + else + { + if (result_reg != VB) + m_float_emit.MOV(result_reg, VB); + m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg, 0); + } + break; + case 15: // ps_madds1: d = a * c.ps1 + b + if (inaccurate_fma) + { + m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg, 1); + m_float_emit.FADD(size, result_reg, inaccurate_fma_reg, VB); + } + else + { + if (result_reg != VB) + m_float_emit.MOV(result_reg, VB); + m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg, 1); + } + break; + case 18: // ps_div + m_float_emit.FDIV(size, result_reg, VA, VB); + break; + case 20: // ps_sub + m_float_emit.FSUB(size, result_reg, VA, VB); + break; + case 21: // ps_add + m_float_emit.FADD(size, result_reg, VA, VB); + break; + case 25: // ps_mul + m_float_emit.FMUL(size, result_reg, VA, rounded_c_reg); + break; + case 28: // ps_msub: d = a * c - b + case 30: // ps_nmsub: d = -(a * c - b) + if (inaccurate_fma) + { + m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg); + m_float_emit.FSUB(size, result_reg, inaccurate_fma_reg, VB); + } + else + { + m_float_emit.FNEG(size, result_reg, VB); + m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg); + } + break; + case 29: // ps_madd: d = a * c + b + case 31: // ps_nmadd: d = -(a * c + b) + if (inaccurate_fma) + { + m_float_emit.FMUL(size, inaccurate_fma_reg, VA, rounded_c_reg); + m_float_emit.FADD(size, result_reg, inaccurate_fma_reg, VB); + } + else + { + if (result_reg != VB) + m_float_emit.MOV(result_reg, VB); + m_float_emit.FMLA(size, result_reg, VA, rounded_c_reg); + } + break; + default: + ASSERT_MSG(DYNA_REC, 0, "ps_arith - invalid op"); + break; + } - m_float_emit.FADD(size, quiet_nan_reg, result_reg, result_reg); - m_float_emit.FCMEQ(size, nan_temp_reg_paired, result_reg, result_reg); + FixupBranch nan_fixup; + if (m_accurate_nans) + { + const ARM64Reg nan_temp_reg = singles ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q); + const ARM64Reg nan_temp_reg_paired = reg_encoder(V0Q); + + // Check if we need to handle NaNs + + m_float_emit.FMAXP(nan_temp_reg, result_reg); + m_float_emit.FCMP(nan_temp_reg); + FixupBranch no_nan = B(CCFlags::CC_VC); + FixupBranch nan = B(); + SetJumpTarget(no_nan); + + SwitchToFarCode(); + SetJumpTarget(nan); + + // Pick the right NaNs + + const auto check_input = [&](ARM64Reg input) { + m_float_emit.FCMEQ(size, nan_temp_reg_paired, input, input); + m_float_emit.BIF(result_reg, input, nan_temp_reg_paired); + }; + + ARM64Reg c_reg_for_nan_purposes = VC; + if (duplicated_c) + { + c_reg_for_nan_purposes = reg_encoder(V2Q); + m_float_emit.DUP(size, c_reg_for_nan_purposes, VC, op5 & 0x1); + } + + if (use_c) + check_input(c_reg_for_nan_purposes); + + if (use_b && (!use_c || VB != c_reg_for_nan_purposes)) + check_input(VB); + + if ((!use_b || VA != VB) && (!use_c || VA != c_reg_for_nan_purposes)) + check_input(VA); + + // Make the NaNs quiet + + const ARM64Reg quiet_nan_reg = VD == result_reg ? reg_encoder(V2Q) : VD; + + m_float_emit.FADD(size, quiet_nan_reg, result_reg, result_reg); + m_float_emit.FCMEQ(size, nan_temp_reg_paired, result_reg, result_reg); + if (negate_result) + m_float_emit.FNEG(size, result_reg, result_reg); + if (VD == result_reg) + m_float_emit.BIF(VD, quiet_nan_reg, nan_temp_reg_paired); + else // quiet_nan_reg == VD + m_float_emit.BIT(VD, result_reg, nan_temp_reg_paired); + + nan_fixup = B(); + + SwitchToNearCode(); + } + + // PowerPC's nmadd/nmsub perform rounding before the final negation, which is not the case + // for any of AArch64's FMA instructions, so we negate using a separate instruction. if (negate_result) - m_float_emit.FNEG(size, result_reg, result_reg); - if (VD == result_reg) - m_float_emit.BIF(VD, quiet_nan_reg, nan_temp_reg_paired); - else // quiet_nan_reg == VD - m_float_emit.BIT(VD, result_reg, nan_temp_reg_paired); + m_float_emit.FNEG(size, VD, result_reg); + else if (result_reg != VD) + m_float_emit.MOV(VD, result_reg); - nan_fixup = B(); - - SwitchToNearCode(); + if (m_accurate_nans) + SetJumpTarget(nan_fixup); } - // PowerPC's nmadd/nmsub perform rounding before the final negation, which is not the case - // for any of AArch64's FMA instructions, so we negate using a separate instruction. - if (negate_result) - m_float_emit.FNEG(size, VD, result_reg); - else if (result_reg != VD) - m_float_emit.MOV(VD, result_reg); - - if (m_accurate_nans) - SetJumpTarget(nan_fixup); - - if (V0Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V0Q); - if (V1Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V1Q); - if (V2Q != ARM64Reg::INVALID_REG) - fpr.Unlock(V2Q); - ASSERT_MSG(DYNA_REC, singles == singles_func(), "Register allocation turned singles into doubles in the middle of ps_arith"); @@ -339,12 +334,11 @@ void JitArm64::ps_sel(UGeckoInstruction inst) } else { - const ARM64Reg V0Q = fpr.GetReg(); + const auto V0Q = fpr.GetScopedReg(); const ARM64Reg V0 = reg_encoder(V0Q); m_float_emit.FCMGE(size, V0, VA); m_float_emit.BSL(V0, VC, VB); m_float_emit.MOV(VD, V0); - fpr.Unlock(V0Q); } ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), @@ -375,41 +369,45 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) const ARM64Reg VB = fpr.R(b, type); const ARM64Reg VC = fpr.R(c, type); const ARM64Reg VD = fpr.RW(d, type); - const ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VB), 1); + { + const auto V0 = fpr.GetScopedReg(); - if (m_accurate_nans) - { - // If the first input is NaN, set the temp register for the second input to 0. This is because: - // - // - If the second input is also NaN, setting it to 0 ensures that the first NaN will be picked. - // - If only the first input is NaN, setting the second input to 0 has no effect on the result. - // - // Either way, we can then do an FADD as usual, and the FADD will make the NaN quiet. - m_float_emit.FCMP(scalar_reg_encoder(VA)); - FixupBranch a_not_nan = B(CCFlags::CC_VC); - m_float_emit.MOVI(64, scalar_reg_encoder(V0), 0); - SetJumpTarget(a_not_nan); - } + m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VB), 1); - if (upper) - { - m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(V0), scalar_reg_encoder(VA)); - m_float_emit.TRN1(size, reg_encoder(VD), reg_encoder(VC), reg_encoder(V0)); - } - else if (d != c) - { - m_float_emit.FADD(scalar_reg_encoder(VD), scalar_reg_encoder(V0), scalar_reg_encoder(VA)); - m_float_emit.INS(size, VD, 1, VC, 1); - } - else - { - m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(V0), scalar_reg_encoder(VA)); - m_float_emit.INS(size, VD, 0, V0, 0); - } + if (m_accurate_nans) + { + // If the first input is NaN, set the temp register for the second input to 0. This is + // because: + // + // - If the second input is also NaN, setting it to 0 ensures that the first NaN will be + // picked. + // - If only the first input is NaN, setting the second input to 0 has no effect on the + // result. + // + // Either way, we can then do an FADD as usual, and the FADD will make the NaN quiet. + m_float_emit.FCMP(scalar_reg_encoder(VA)); + FixupBranch a_not_nan = B(CCFlags::CC_VC); + m_float_emit.MOVI(64, scalar_reg_encoder(V0), 0); + SetJumpTarget(a_not_nan); + } - fpr.Unlock(V0); + if (upper) + { + m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(V0), scalar_reg_encoder(VA)); + m_float_emit.TRN1(size, reg_encoder(VD), reg_encoder(VC), reg_encoder(V0)); + } + else if (d != c) + { + m_float_emit.FADD(scalar_reg_encoder(VD), scalar_reg_encoder(V0), scalar_reg_encoder(VA)); + m_float_emit.INS(size, VD, 1, VC, 1); + } + else + { + m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(V0), scalar_reg_encoder(VA)); + m_float_emit.INS(size, VD, 0, V0, 0); + } + } ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), "Register allocation turned singles into doubles in the middle of ps_sumX"); From defb2d65a66f3307eee22065cabec341243321c9 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sun, 23 Jun 2024 23:21:07 +0200 Subject: [PATCH 10/10] JitArm64_SystemRegisters: Use ScopedARM64Reg --- .../JitArm64/JitArm64_SystemRegisters.cpp | 345 ++++++++---------- 1 file changed, 155 insertions(+), 190 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 8ffa1b84aa..df4ea4931a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -48,17 +48,16 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg) // if the internal representation either has bit 63 set or has all bits set to zero. // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT // doesn't accidentally become considered set. Gross but necessary; this can break actual games. - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); CMP(reg, ARM64Reg::ZR); CSEL(reg, reg, XA, CC_NEQ); - gpr.Unlock(WA); } void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) { - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); // fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0 MOVI2R(WA, FPSCR_VX_ANY); @@ -71,8 +70,6 @@ void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) TST(WA, fpscr, ArithOption(fpscr, ShiftType::LSR, 22)); CSET(WA, CCFlags::CC_NEQ); BFI(fpscr, WA, MathUtil::IntLog2(FPSCR_FEX), 1); - - gpr.Unlock(WA); } void JitArm64::UpdateRoundingMode() @@ -135,7 +132,7 @@ void JitArm64::mcrxr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindCRToRegister(inst.CRFD, false); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XB = gpr.CR(inst.CRFD); ARM64Reg WB = EncodeRegTo32(XB); @@ -155,8 +152,6 @@ void JitArm64::mcrxr(UGeckoInstruction inst) // Clear XER[0-3] static_assert(PPCSTATE_OFF(xer_ca) + 1 == PPCSTATE_OFF(xer_so_ov)); STRH(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(xer_ca)); - - gpr.Unlock(WA); } void JitArm64::mfsr(UGeckoInstruction inst) @@ -186,14 +181,12 @@ void JitArm64::mfsrin(UGeckoInstruction inst) ARM64Reg RB = gpr.R(b); ARM64Reg RD = gpr.R(d); - ARM64Reg index = gpr.GetReg(); + auto index = gpr.GetScopedReg(); ARM64Reg addr = EncodeRegTo64(RD); UBFM(index, RB, 28, 31); ADDI2R(addr, PPC_REG, PPCSTATE_OFF_SR(0), addr); LDR(RD, addr, ArithOption(EncodeRegTo64(index), true)); - - gpr.Unlock(index); } void JitArm64::mtsrin(UGeckoInstruction inst) @@ -206,14 +199,12 @@ void JitArm64::mtsrin(UGeckoInstruction inst) ARM64Reg RB = gpr.R(b); ARM64Reg RD = gpr.R(d); - ARM64Reg index = gpr.GetReg(); - ARM64Reg addr = gpr.GetReg(); + auto index = gpr.GetScopedReg(); + auto addr = gpr.GetScopedReg(); UBFM(index, RB, 28, 31); ADDI2R(EncodeRegTo64(addr), PPC_REG, PPCSTATE_OFF_SR(0), EncodeRegTo64(addr)); STR(RD, EncodeRegTo64(addr), ArithOption(EncodeRegTo64(index), true)); - - gpr.Unlock(index, addr); } void JitArm64::twx(UGeckoInstruction inst) @@ -223,7 +214,7 @@ void JitArm64::twx(UGeckoInstruction inst) s32 a = inst.RA; - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); if (inst.OPCD == 3) // twi { @@ -278,8 +269,6 @@ void JitArm64::twx(UGeckoInstruction inst) fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); WriteExit(js.compilerPC + 4); } - - gpr.Unlock(WA); } void JitArm64::mfspr(UGeckoInstruction inst) @@ -294,19 +283,19 @@ void JitArm64::mfspr(UGeckoInstruction inst) case SPR_TL: case SPR_TU: { - ARM64Reg Wg = gpr.GetReg(); + auto Wg = gpr.GetScopedReg(); ARM64Reg Xg = EncodeRegTo64(Wg); - ARM64Reg Wresult = gpr.GetReg(); + auto Wresult = gpr.GetScopedReg(); ARM64Reg Xresult = EncodeRegTo64(Wresult); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); + auto WB = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XB = EncodeRegTo64(WB); - ARM64Reg VC = fpr.GetReg(); - ARM64Reg VD = fpr.GetReg(); + auto VC = fpr.GetScopedReg(); + auto VD = fpr.GetScopedReg(); ARM64Reg SC = EncodeRegToSingle(VC); ARM64Reg SD = EncodeRegToSingle(VD); @@ -371,8 +360,6 @@ void JitArm64::mfspr(UGeckoInstruction inst) else LSR(EncodeRegTo64(gpr.R(n)), Xresult, 32); - gpr.Unlock(Wg, Wresult, WA, WB); - fpr.Unlock(VC, VD); break; } } @@ -381,22 +368,18 @@ void JitArm64::mfspr(UGeckoInstruction inst) LSR(EncodeRegTo64(gpr.R(d)), Xresult, 32); else MOV(gpr.R(d), Wresult); - - gpr.Unlock(Wg, Wresult, WA, WB); - fpr.Unlock(VC, VD); } break; case SPR_XER: { gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); LDRH(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_stringctrl)); LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ORR(RD, RD, WA, ArithOption(WA, ShiftType::LSL, XER_CA_SHIFT)); LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); ORR(RD, RD, WA, ArithOption(WA, ShiftType::LSL, XER_OV_SHIFT)); - gpr.Unlock(WA); } break; case SPR_WPAR: @@ -462,14 +445,13 @@ void JitArm64::mtspr(UGeckoInstruction inst) case SPR_XER: { ARM64Reg RD = gpr.R(inst.RD); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); AND(WA, RD, LogicalImm(0xFFFFFF7F, GPRSize::B32)); STRH(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_stringctrl)); UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT + 1); STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); UBFM(WA, RD, XER_OV_SHIFT, 31); // Same as WA = RD >> XER_OV_SHIFT STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); - gpr.Unlock(WA); } break; default: @@ -553,114 +535,112 @@ void JitArm64::crXXX(UGeckoInstruction inst) return; } - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - ARM64Reg WB = gpr.GetReg(); - ARM64Reg XB = EncodeRegTo64(WB); - - // creqv or crnand or crnor - bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // crandc or crorc or crnand or crnor - bool negateB = - inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - - // GetCRFieldBit - for (int i = 0; i < 2; i++) { - int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2; - int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3); - ARM64Reg out = i ? XB : XA; - bool negate = i ? negateB : negateA; + auto WB = gpr.GetScopedReg(); + ARM64Reg XB = EncodeRegTo64(WB); - ARM64Reg XC = gpr.CR(field); - ARM64Reg WC = EncodeRegTo32(XC); - switch (bit) + // creqv or crnand or crnor + bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; + // crandc or crorc or crnand or crnor + bool negateB = + inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; + + // GetCRFieldBit + for (int i = 0; i < 2; i++) { - case PowerPC::CR_SO_BIT: // check bit 59 set - UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; + int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2; + int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3); + ARM64Reg out = i ? XB : XA; + bool negate = i ? negateB : negateA; - case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 - CMP(WC, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); - break; + ARM64Reg XC = gpr.CR(field); + ARM64Reg WC = EncodeRegTo32(XC); + switch (bit) + { + case PowerPC::CR_SO_BIT: // check bit 59 set + UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; - case PowerPC::CR_GT_BIT: // check val > 0 - CMP(XC, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); - break; + case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 + CMP(WC, ARM64Reg::WZR); + CSET(out, negate ? CC_NEQ : CC_EQ); + break; - case PowerPC::CR_LT_BIT: // check bit 62 set - UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; + case PowerPC::CR_GT_BIT: // check val > 0 + CMP(XC, ARM64Reg::ZR); + CSET(out, negate ? CC_LE : CC_GT); + break; - default: - ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); + case PowerPC::CR_LT_BIT: // check bit 62 set + UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + default: + ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); + } } - } - // Compute combined bit - switch (inst.SUBOP10) - { - case 33: // crnor: ~(A || B) == (~A && ~B) - case 129: // crandc: A && ~B - case 257: // crand: A && B - AND(XA, XA, XB); - break; + // Compute combined bit + switch (inst.SUBOP10) + { + case 33: // crnor: ~(A || B) == (~A && ~B) + case 129: // crandc: A && ~B + case 257: // crand: A && B + AND(XA, XA, XB); + break; - case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) = ~A ^ B - EOR(XA, XA, XB); - break; + case 193: // crxor: A ^ B + case 289: // creqv: ~(A ^ B) = ~A ^ B + EOR(XA, XA, XB); + break; - case 225: // crnand: ~(A && B) == (~A || ~B) - case 417: // crorc: A || ~B - case 449: // cror: A || B - ORR(XA, XA, XB); - break; + case 225: // crnand: ~(A && B) == (~A || ~B) + case 417: // crorc: A || ~B + case 449: // cror: A || B + ORR(XA, XA, XB); + break; + } } // Store result bit in CRBD int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); - gpr.Unlock(WB); - WB = ARM64Reg::INVALID_REG; gpr.BindCRToRegister(field, true); - XB = gpr.CR(field); + ARM64Reg CR = gpr.CR(field); if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(XB); + FixGTBeforeSettingCRFieldBit(CR); switch (bit) { case PowerPC::CR_SO_BIT: // set bit 59 to input - BFI(XB, XA, PowerPC::CR_EMU_SO_BIT, 1); + BFI(CR, XA, PowerPC::CR_EMU_SO_BIT, 1); break; case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - AND(XB, XB, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - ORR(XB, XB, XA); + ORR(CR, CR, XA); break; case PowerPC::CR_GT_BIT: // set bit 63 to !input EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - BFI(XB, XA, 63, 1); + BFI(CR, XA, 63, 1); break; case PowerPC::CR_LT_BIT: // set bit 62 to input - BFI(XB, XA, PowerPC::CR_EMU_LT_BIT, 1); + BFI(CR, XA, PowerPC::CR_EMU_LT_BIT, 1); break; } - ORR(XB, XB, LogicalImm(1ULL << 32, GPRSize::B64)); - - gpr.Unlock(WA); + ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); } void JitArm64::mfcr(UGeckoInstruction inst) @@ -670,8 +650,8 @@ void JitArm64::mfcr(UGeckoInstruction inst) gpr.BindToRegister(inst.RD, false); ARM64Reg WA = gpr.R(inst.RD); - ARM64Reg WB = gpr.GetReg(); - ARM64Reg WC = gpr.GetReg(); + auto WB = gpr.GetScopedReg(); + auto WC = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XB = EncodeRegTo64(WB); ARM64Reg XC = EncodeRegTo64(WC); @@ -716,8 +696,6 @@ void JitArm64::mfcr(UGeckoInstruction inst) else if (!js.op->crInUse[i]) gpr.StoreCRRegisters(BitSet8{i}, WC); } - - gpr.Unlock(WB, WC); } void JitArm64::mtcrf(UGeckoInstruction inst) @@ -729,7 +707,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst) if (crm != 0) { ARM64Reg RS = gpr.R(inst.RS); - ARM64Reg WB = gpr.GetReg(); + auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); MOVP2R(XB, PowerPC::ConditionRegister::s_crTable.data()); for (int i = 0; i < 8; ++i) @@ -753,7 +731,6 @@ void JitArm64::mtcrf(UGeckoInstruction inst) LDR(CR, XB, ArithOption(CR, true)); } } - gpr.Unlock(WB); } } @@ -771,7 +748,7 @@ void JitArm64::mcrfs(UGeckoInstruction inst) gpr.BindCRToRegister(field, false); ARM64Reg CR = gpr.CR(field); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg WCR = EncodeRegTo32(CR); ARM64Reg XA = EncodeRegTo64(WA); @@ -789,8 +766,6 @@ void JitArm64::mcrfs(UGeckoInstruction inst) MOVP2R(XA, PowerPC::ConditionRegister::s_crTable.data()); LDR(CR, XA, ArithOption(CR, true)); - - gpr.Unlock(WA); } void JitArm64::mffsx(UGeckoInstruction inst) @@ -799,7 +774,7 @@ void JitArm64::mffsx(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); @@ -808,8 +783,6 @@ void JitArm64::mffsx(UGeckoInstruction inst) ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, GPRSize::B64)); m_float_emit.FMOV(EncodeRegToDouble(VD), XA); - - gpr.Unlock(WA); } void JitArm64::mtfsb0x(UGeckoInstruction inst) @@ -824,17 +797,17 @@ void JitArm64::mtfsb0x(UGeckoInstruction inst) if (mask == FPSCR_FEX || mask == FPSCR_VX) return; - ARM64Reg WA = gpr.GetReg(); + { + auto WA = gpr.GetScopedReg(); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - AND(WA, WA, LogicalImm(inverted_mask, GPRSize::B32)); + AND(WA, WA, LogicalImm(inverted_mask, GPRSize::B32)); - if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) - UpdateFPExceptionSummary(WA); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - gpr.Unlock(WA); + if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + } if (inst.CRBD >= 29) UpdateRoundingMode(); @@ -852,25 +825,24 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst) if (mask == FPSCR_FEX || mask == FPSCR_VX) return; - ARM64Reg WA = gpr.GetReg(); - - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - if ((mask & FPSCR_ANY_X) != 0) { - ARM64Reg WB = gpr.GetReg(); - TST(WA, LogicalImm(mask, GPRSize::B32)); - ORR(WB, WA, LogicalImm(1 << 31, GPRSize::B32)); - CSEL(WA, WA, WB, CCFlags::CC_NEQ); - gpr.Unlock(WB); + auto WA = gpr.GetScopedReg(); + + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + + if ((mask & FPSCR_ANY_X) != 0) + { + auto WB = gpr.GetScopedReg(); + TST(WA, LogicalImm(mask, GPRSize::B32)); + ORR(WB, WA, LogicalImm(1 << 31, GPRSize::B32)); + CSEL(WA, WA, WB, CCFlags::CC_NEQ); + } + ORR(WA, WA, LogicalImm(mask, GPRSize::B32)); + + if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); } - ORR(WA, WA, LogicalImm(mask, GPRSize::B32)); - - if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) - UpdateFPExceptionSummary(WA); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - gpr.Unlock(WA); if (inst.CRBD >= 29) UpdateRoundingMode(); @@ -887,32 +859,31 @@ void JitArm64::mtfsfix(UGeckoInstruction inst) u8 shift = 28 - 4 * inst.CRFD; u32 mask = 0xF << shift; - ARM64Reg WA = gpr.GetReg(); - - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - if (imm == 0xF) { - ORR(WA, WA, LogicalImm(mask, GPRSize::B32)); - } - else if (imm == 0x0) - { - const u32 inverted_mask = ~mask; - AND(WA, WA, LogicalImm(inverted_mask, GPRSize::B32)); - } - else - { - ARM64Reg WB = gpr.GetReg(); - MOVZ(WB, imm); - BFI(WA, WB, shift, 4); - gpr.Unlock(WB); - } + auto WA = gpr.GetScopedReg(); - if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) - UpdateFPExceptionSummary(WA); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - gpr.Unlock(WA); + if (imm == 0xF) + { + ORR(WA, WA, LogicalImm(mask, GPRSize::B32)); + } + else if (imm == 0x0) + { + const u32 inverted_mask = ~mask; + AND(WA, WA, LogicalImm(inverted_mask, GPRSize::B32)); + } + else + { + auto WB = gpr.GetScopedReg(); + MOVZ(WB, imm); + BFI(WA, WB, shift, 4); + } + + if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + } // Field 7 contains NI and RN. if (inst.CRFD == 7) @@ -936,49 +907,43 @@ void JitArm64::mtfsfx(UGeckoInstruction inst) if (mask == 0xFFFFFFFF) { ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair); - ARM64Reg WA = gpr.GetReg(); + auto WA = gpr.GetScopedReg(); m_float_emit.FMOV(WA, EncodeRegToSingle(VB)); UpdateFPExceptionSummary(WA); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - gpr.Unlock(WA); } else if (mask != 0) { ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = gpr.GetReg(); - - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - m_float_emit.FMOV(WB, EncodeRegToSingle(VB)); - - if (LogicalImm imm = LogicalImm(mask, GPRSize::B32)) + auto WA = gpr.GetScopedReg(); { - const u32 inverted_mask = ~mask; - AND(WA, WA, LogicalImm(inverted_mask, GPRSize::B32)); - AND(WB, WB, imm); + auto WB = gpr.GetScopedReg(); + + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + m_float_emit.FMOV(WB, EncodeRegToSingle(VB)); + + if (LogicalImm imm = LogicalImm(mask, GPRSize::B32)) + { + const u32 inverted_mask = ~mask; + AND(WA, WA, LogicalImm(inverted_mask, GPRSize::B32)); + AND(WB, WB, imm); + } + else + { + auto WC = gpr.GetScopedReg(); + + MOVI2R(WC, mask); + BIC(WA, WA, WC); + AND(WB, WB, WC); + } + ORR(WA, WA, WB); } - else - { - ARM64Reg WC = gpr.GetReg(); - - MOVI2R(WC, mask); - BIC(WA, WA, WC); - AND(WB, WB, WC); - - gpr.Unlock(WC); - } - ORR(WA, WA, WB); - - gpr.Unlock(WB); if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) UpdateFPExceptionSummary(WA); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - gpr.Unlock(WA); } if (inst.FM & 1)