diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 141336e212..44f229f3ab 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -46,7 +46,7 @@ std::optional> IsImmArithmetic(uint64_t input) } // For AND/TST/ORR/EOR etc -std::optional> IsImmLogical(u64 value, u32 width) +LogicalImm IsImmLogical(u64 value, u32 width) { bool negate = false; @@ -154,7 +154,7 @@ std::optional> IsImmLogical(u64 value, u32 width) // The input was zero (or all 1 bits, which will come to here too after we // inverted it at the start of the function), for which we just return // false. - return std::nullopt; + return LogicalImm(); } else { @@ -171,12 +171,12 @@ std::optional> IsImmLogical(u64 value, u32 width) // If the repeat period d is not a power of two, it can't be encoded. if (!MathUtil::IsPow2(d)) - return std::nullopt; + return LogicalImm(); // If the bit stretch (b - a) does not fit within the mask derived from the // repeat period, then fail. if (((b - a) & ~mask) != 0) - return std::nullopt; + return LogicalImm(); // The only possible option is b - a repeated every d bits. Now we're going to // actually construct the valid logical immediate derived from that @@ -204,7 +204,7 @@ std::optional> IsImmLogical(u64 value, u32 width) // The candidate pattern doesn't match our input value, so fail. if (value != candidate) - return std::nullopt; + return LogicalImm(); // We have a match! This is a valid logical immediate, so now we have to // construct the bits and pieces of the instruction encoding that generates @@ -246,11 +246,8 @@ std::optional> IsImmLogical(u64 value, u32 width) // 11110s 2 UInt(s) // // So we 'or' (-d << 1) with our computed s to form imms. - return std::tuple{ - static_cast(out_n), - static_cast(((-d << 1) | (s - 1)) & 0x3f), - static_cast(r), - }; + return LogicalImm(static_cast(r), static_cast(((-d << 1) | (s - 1)) & 0x3f), + static_cast(out_n)); } float FPImm8ToFloat(u8 bits) @@ -780,10 +777,18 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i // Use Rn to determine bitness here. bool b64Bit = Is64Bit(Rn); + ASSERT_MSG(DYNAREC, b64Bit || !n, "64-bit logical immediate does not fit in 32-bit register"); + Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd)); } +void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) +{ + ASSERT_MSG(DYNAREC, imm.valid, "Invalid logical immediate"); + EncodeLogicalImmInst(op, Rd, Rn, imm.r, imm.s, imm.n); +} + void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) { @@ -1545,22 +1550,42 @@ void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool inver { EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert); } +void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) +{ + EncodeLogicalImmInst(0, Rd, Rn, imm); +} void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) { EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert); } +void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) +{ + EncodeLogicalImmInst(3, Rd, Rn, imm); +} void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) { EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert); } +void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) +{ + EncodeLogicalImmInst(2, Rd, Rn, imm); +} void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) { EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert); } +void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) +{ + EncodeLogicalImmInst(1, Rd, Rn, imm); +} void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert) { EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, immr, imms, invert); } +void ARM64XEmitter::TST(ARM64Reg Rn, LogicalImm imm) +{ + EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm); +} // Add/subtract (immediate) void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) @@ -4129,8 +4154,7 @@ void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - AND(Rd, Rn, imm_r, imm_s, n != 0); + AND(Rd, Rn, result); } else { @@ -4146,8 +4170,7 @@ void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - ORR(Rd, Rn, imm_r, imm_s, n != 0); + ORR(Rd, Rn, result); } else { @@ -4163,8 +4186,7 @@ void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - EOR(Rd, Rn, imm_r, imm_s, n != 0); + EOR(Rd, Rn, result); } else { @@ -4180,8 +4202,7 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - ANDS(Rd, Rn, imm_r, imm_s, n != 0); + ANDS(Rd, Rn, result); } else { @@ -4342,10 +4363,9 @@ bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm) bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) { - if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32)) + if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - AND(Rd, Rn, imm_r, imm_s, n != 0); + AND(Rd, Rn, result); return true; } @@ -4354,10 +4374,9 @@ bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) { - if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32)) + if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - ORR(Rd, Rn, imm_r, imm_s, n != 0); + ORR(Rd, Rn, result); return true; } @@ -4366,10 +4385,9 @@ bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) { - if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32)) + if (const auto result = IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32)) { - const auto& [n, imm_s, imm_r] = *result; - EOR(Rd, Rn, imm_r, imm_s, n != 0); + EOR(Rd, Rn, result); return true; } diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 5702bebbd9..f62b62e3ef 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -496,6 +496,19 @@ public: bool IsExtended() const { return m_type == TypeSpecifier::ExtendedReg; } }; +struct LogicalImm +{ + constexpr LogicalImm() : r(0), s(0), n(false), valid(false) {} + constexpr LogicalImm(u8 r_, u8 s_, bool n_) : r(r_), s(s_), n(n_), valid(true) {} + + constexpr operator bool() const { return valid; } + + u8 r; + u8 s; + bool n; + bool valid; +}; + class ARM64XEmitter { friend class ARM64FloatEmitter; @@ -531,6 +544,7 @@ private: void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n); + void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm); @@ -772,10 +786,15 @@ public: // Logical (immediate) void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); + void AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); + void ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); + void EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); + void ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); + void TST(ARM64Reg Rn, LogicalImm imm); // Add/subtract (immediate) void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); @@ -893,8 +912,10 @@ public: MOVI2R(Rd, (uintptr_t)ptr); } - // Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch - // register. + // Wrapper around AND x, y, imm etc. + // If you are sure the imm will work, no need to pass a scratch register. + // If the imm is constant, preferably call EncodeLogicalImm directly instead of using these + // functions, as this lets the computation of the imm encoding be performed during compilation. void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG) @@ -903,7 +924,6 @@ public: } void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); - void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch); @@ -911,6 +931,7 @@ public: void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); + void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm); bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);