diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index f719d9aef6..91ad58d9f0 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1542,19 +1542,22 @@ void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm) } void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift) { - ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, shift)); + int bits = Is64Bit(Rd) ? 64 : 32; + UBFM(Rd, Rm, (bits - shift) & (bits - 1), bits - shift - 1); } void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift) { - ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSR, shift)); + int bits = Is64Bit(Rd) ? 64 : 32; + UBFM(Rd, Rm, shift, bits - 1); } void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift) { - ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ASR, shift)); + int bits = Is64Bit(Rd) ? 64 : 32; + SBFM(Rd, Rm, shift, bits - 1); } void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift) { - ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ROR, shift)); + EXTR(Rd, Rm, Rm, shift); } // Logical (immediate) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index fa77737161..4b5bc9f137 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -721,7 +721,7 @@ public: void MOV(ARM64Reg Rd, ARM64Reg Rm); void MVN(ARM64Reg Rd, ARM64Reg Rm); - // TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants. + // Convenience wrappers around UBFM/EXTR. void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift); void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift); void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 64906d7804..8b6993eceb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -196,14 +196,14 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR { m_float_emit.FCVTN(32, D0, RS); m_float_emit.UMOV(64, X0, D0, 0); - ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); + ROR(X0, X0, 32); MOVP2R(X30, &PowerPC::Write_U64); BLR(X30); } else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I) { m_float_emit.UMOV(64, X0, RS, 0); - ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); + ROR(X0, X0, 32); MOVP2R(X30, &PowerPC::Write_U64); BLR(X30); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 41b06c871e..11b267d2f7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -532,11 +532,28 @@ void JitArm64::rlwinmx(UGeckoInstruction inst) gpr.BindToRegister(a, a == s); - ARM64Reg WA = gpr.GetReg(); - ArithOption Shift(gpr.R(s), ST_ROR, 32 - inst.SH); - MOVI2R(WA, mask); - AND(gpr.R(a), WA, gpr.R(s), Shift); - gpr.Unlock(WA); + if (!inst.SH) + { + // Immediate mask + ANDI2R(gpr.R(a), gpr.R(s), mask); + } + else if (inst.ME == 31 && 31 < inst.SH + inst.MB) + { + // Bit select of the upper part + UBFX(gpr.R(a), gpr.R(s), 32 - inst.SH, 32 - inst.MB); + } + else if (inst.ME == 31 - inst.SH && 32 > inst.SH + inst.MB) + { + // Bit select of the lower part + UBFIZ(gpr.R(a), gpr.R(s), inst.SH, 32 - inst.SH - inst.MB); + } + else + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, mask); + AND(gpr.R(a), WA, gpr.R(s), ArithOption(gpr.R(s), ST_ROR, 32 - inst.SH)); + gpr.Unlock(WA); + } if (inst.Rc) ComputeRC0(gpr.R(a)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index c1e08bdf29..b4e057753e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -499,8 +499,8 @@ void JitArm64::lmw(UGeckoInstruction inst) LDP(INDEX_POST, EncodeRegTo64(RX1), EncodeRegTo64(RX3), XA, 16); REV32(EncodeRegTo64(RX1), EncodeRegTo64(RX1)); REV32(EncodeRegTo64(RX3), EncodeRegTo64(RX3)); - ORR(EncodeRegTo64(RX2), ZR, EncodeRegTo64(RX1), ArithOption(EncodeRegTo64(RX1), ST_LSR, 32)); - ORR(EncodeRegTo64(RX4), ZR, EncodeRegTo64(RX3), ArithOption(EncodeRegTo64(RX3), ST_LSR, 32)); + LSR(EncodeRegTo64(RX2), EncodeRegTo64(RX1), 32); + LSR(EncodeRegTo64(RX4), EncodeRegTo64(RX3), 32); i += 3; } else if (remaining >= 2) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 0a06ac51a1..f5071e71bc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -300,12 +300,12 @@ void JitArm64::mfspr(UGeckoInstruction inst) if (iIndex == SPR_TL) MOV(gpr.R(d), Wresult); else - ORR(EncodeRegTo64(gpr.R(d)), ZR, Xresult, ArithOption(Xresult, ST_LSR, 32)); + LSR(EncodeRegTo64(gpr.R(d)), Xresult, 32); if (nextIndex == SPR_TL) MOV(gpr.R(n), Wresult); else - ORR(EncodeRegTo64(gpr.R(n)), ZR, Xresult, ArithOption(Xresult, ST_LSR, 32)); + LSR(EncodeRegTo64(gpr.R(n)), Xresult, 32); gpr.Unlock(Wg, Wresult, WA, WB); fpr.Unlock(VC, VD); @@ -314,7 +314,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) } gpr.BindToRegister(d, false); if (iIndex == SPR_TU) - ORR(EncodeRegTo64(gpr.R(d)), ZR, Xresult, ArithOption(Xresult, ST_LSR, 32)); + LSR(EncodeRegTo64(gpr.R(d)), Xresult, 32); else MOV(gpr.R(d), Wresult); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 603c36d38c..316336b667 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -374,7 +374,7 @@ void JitArm64::GenerateCommonAsm() storePairedFloatSlow = GetCodePtr(); float_emit.UMOV(64, X0, Q0, 0); - ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); + ROR(X0, X0, 32); MOVP2R(X2, &PowerPC::Write_U64); BR(X2); }