diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 98a857955a..3b5a662d70 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -4149,141 +4149,115 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) } } +void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, + ARM64Reg scratch) +{ + auto addi = [this](ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, bool flags) { + switch ((negative << 1) | flags) + { + case 0: + ADD(Rd, Rn, imm, shift); + break; + case 1: + ADDS(Rd, Rn, imm, shift); + break; + case 2: + SUB(Rd, Rn, imm, shift); + break; + case 3: + SUBS(Rd, Rn, imm, shift); + break; + } + }; + + bool has_scratch = scratch != INVALID_REG; + u64 imm_neg = Is64Bit(Rd) ? -imm : -imm & 0xFFFFFFFFuLL; + bool neg_neg = negative ? false : true; + + // Fast paths, aarch64 immediate instructions + // Try them all first + if (imm <= 0xFFF) + { + addi(Rd, Rn, imm, false, negative, flags); + return; + } + if (imm <= 0xFFFFFF && (imm & 0xFFF) == 0) + { + addi(Rd, Rn, imm >> 12, true, negative, flags); + return; + } + if (imm_neg <= 0xFFF) + { + addi(Rd, Rn, imm_neg, false, neg_neg, flags); + return; + } + if (imm_neg <= 0xFFFFFF && (imm_neg & 0xFFF) == 0) + { + addi(Rd, Rn, imm_neg >> 12, true, neg_neg, flags); + return; + } + + // ADD+ADD is slower than MOVK+ADD, but inplace. + // But it supports a few more bits, so use it to avoid MOVK+MOVK+ADD. + // As this splits the addition in two parts, this must not be done on setting flags. + if (!flags && (imm >= 0x10000u || !has_scratch) && imm < 0x1000000u) + { + addi(Rd, Rn, imm & 0xFFF, false, negative, false); + addi(Rd, Rd, imm >> 12, true, negative, false); + return; + } + if (!flags && (imm_neg >= 0x10000u || !has_scratch) && imm_neg < 0x1000000u) + { + addi(Rd, Rn, imm_neg & 0xFFF, false, neg_neg, false); + addi(Rd, Rd, imm_neg >> 12, true, neg_neg, false); + return; + } + + _assert_msg_(DYNA_REC, has_scratch, + "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", + (u32)imm); + + negative ^= MOVI2R2(scratch, imm, imm_neg); + switch ((negative << 1) | flags) + { + case 0: + ADD(Rd, Rn, scratch); + break; + case 1: + ADDS(Rd, Rn, scratch); + break; + case 2: + SUB(Rd, Rn, scratch); + break; + case 3: + SUBS(Rd, Rn, scratch); + break; + } +} + void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { - u32 val; - bool shift; - u64 imm_neg = Is64Bit(Rd) ? -imm : -imm & 0xFFFFFFFFuLL; - if (IsImmArithmetic(imm, &val, &shift)) - { - ADD(Rd, Rn, val, shift); - } - else if (IsImmArithmetic(imm_neg, &val, &shift)) - { - SUB(Rd, Rn, val, shift); - } - else if ((imm >= 0x10000u || scratch == INVALID_REG) && imm < 0x1000000u) - { - ADD(Rd, Rn, imm & 0xFFF, false); - ADD(Rd, Rd, imm >> 12, true); - } - else if ((imm_neg >= 0x10000u || scratch == INVALID_REG) && imm_neg < 0x1000000u) - { - SUB(Rd, Rn, imm_neg & 0xFFF, false); - SUB(Rd, Rd, imm_neg >> 12, true); - } - else - { - _assert_msg_(DYNA_REC, scratch != INVALID_REG, - "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", - (u32)imm); - if (MOVI2R2(scratch, imm, imm_neg)) - SUB(Rd, Rn, scratch); - else - ADD(Rd, Rn, scratch); - } + ADDI2R_internal(Rd, Rn, imm, false, false, scratch); } void ARM64XEmitter::ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { - u32 val; - bool shift; - u64 imm_neg = Is64Bit(Rd) ? -imm : -imm & 0xFFFFFFFFuLL; - if (IsImmArithmetic(imm, &val, &shift)) - { - ADDS(Rd, Rn, val, shift); - } - else if (IsImmArithmetic(imm_neg, &val, &shift)) - { - SUBS(Rd, Rn, val, shift); - } - else - { - _assert_msg_(DYNA_REC, scratch != INVALID_REG, - "ADDSI2R - failed to construct arithmetic immediate value from %08x, need scratch", - (u32)imm); - if (MOVI2R2(scratch, imm, imm_neg)) - SUBS(Rd, Rn, scratch); - else - ADDS(Rd, Rn, scratch); - } + ADDI2R_internal(Rd, Rn, imm, false, true, scratch); } void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { - u32 val; - bool shift; - u64 imm_neg = Is64Bit(Rd) ? -imm : -imm & 0xFFFFFFFFuLL; - if (IsImmArithmetic(imm, &val, &shift)) - { - SUB(Rd, Rn, val, shift); - } - else if (IsImmArithmetic(imm_neg, &val, &shift)) - { - ADD(Rd, Rn, val, shift); - } - else if ((imm >= 0x10000u || scratch == INVALID_REG) && imm < 0x1000000u) - { - SUB(Rd, Rn, imm & 0xFFF, false); - SUB(Rd, Rd, imm >> 12, true); - } - else if ((imm_neg >= 0x10000u || scratch == INVALID_REG) && imm_neg < 0x1000000u) - { - ADD(Rd, Rn, imm_neg & 0xFFF, false); - ADD(Rd, Rd, imm_neg >> 12, true); - } - else - { - _assert_msg_(DYNA_REC, scratch != INVALID_REG, - "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", - (u32)imm); - if (MOVI2R2(scratch, imm, imm_neg)) - ADD(Rd, Rn, scratch); - else - SUB(Rd, Rn, scratch); - } + ADDI2R_internal(Rd, Rn, imm, true, false, scratch); } void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { - u32 val; - bool shift; - u64 imm_neg = Is64Bit(Rd) ? -imm : -imm & 0xFFFFFFFFuLL; - if (IsImmArithmetic(imm, &val, &shift)) - { - SUBS(Rd, Rn, val, shift); - } - else if (IsImmArithmetic(imm_neg, &val, &shift)) - { - ADDS(Rd, Rn, val, shift); - } - else - { - _assert_msg_(DYNA_REC, scratch != INVALID_REG, - "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); - if (MOVI2R2(scratch, imm, imm_neg)) - ADDS(Rd, Rn, scratch); - else - SUBS(Rd, Rn, scratch); - } + ADDI2R_internal(Rd, Rn, imm, true, true, scratch); } void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) { - u32 val; - bool shift; - if (IsImmArithmetic(imm, &val, &shift)) - { - CMP(Rn, val, shift); - } - else - { - _assert_msg_(DYNA_REC, scratch != INVALID_REG, - "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", - (u32)imm); - MOVI2R(scratch, imm); - CMP(Rn, scratch); - } + ADDI2R_internal(Is64Bit(Rn) ? ZR : WZR, Rn, imm, true, true, scratch); } bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index a23d5a4349..ea69c371c3 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -855,6 +855,8 @@ public: void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); + void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, + ARM64Reg scratch); void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG); void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);