Arm64Emitter: Use ORR in MOVI2R
This commit is contained in:
parent
0d5ed06daf
commit
9ad4f724e4
|
@ -2040,6 +2040,7 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
|
||||||
MOVN,
|
MOVN,
|
||||||
ADR,
|
ADR,
|
||||||
ADRP,
|
ADRP,
|
||||||
|
ORR,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Part
|
struct Part
|
||||||
|
@ -2055,6 +2056,12 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
|
||||||
|
|
||||||
SmallVector<Part, max_parts> best_parts;
|
SmallVector<Part, max_parts> best_parts;
|
||||||
Approach best_approach;
|
Approach best_approach;
|
||||||
|
u64 best_base;
|
||||||
|
|
||||||
|
const auto instructions_required = [](const SmallVector<Part, max_parts>& parts,
|
||||||
|
Approach approach) {
|
||||||
|
return parts.size() + (approach > Approach::MOVN);
|
||||||
|
};
|
||||||
|
|
||||||
const auto try_base = [&](T base, Approach approach, bool first_time) {
|
const auto try_base = [&](T base, Approach approach, bool first_time) {
|
||||||
SmallVector<Part, max_parts> parts;
|
SmallVector<Part, max_parts> parts;
|
||||||
|
@ -2068,34 +2075,54 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
|
||||||
parts.emplace_back(imm_shifted, static_cast<ShiftAmount>(i));
|
parts.emplace_back(imm_shifted, static_cast<ShiftAmount>(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (first_time || parts.size() < best_parts.size())
|
if (first_time ||
|
||||||
|
instructions_required(parts, approach) < instructions_required(best_parts, best_approach))
|
||||||
{
|
{
|
||||||
best_parts = std::move(parts);
|
best_parts = std::move(parts);
|
||||||
best_approach = approach;
|
best_approach = approach;
|
||||||
|
best_base = base;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Try MOVZ/MOVN
|
||||||
|
try_base(T(0), Approach::MOVZ, true);
|
||||||
|
try_base(~T(0), Approach::MOVN, false);
|
||||||
|
|
||||||
|
// Try PC-relative approaches
|
||||||
const auto sext_21_bit = [](u64 x) {
|
const auto sext_21_bit = [](u64 x) {
|
||||||
return static_cast<s64>((x & 0x1FFFFF) | (x & 0x100000 ? ~0x1FFFFF : 0));
|
return static_cast<s64>((x & 0x1FFFFF) | (x & 0x100000 ? ~0x1FFFFF : 0));
|
||||||
};
|
};
|
||||||
|
|
||||||
const u64 pc = reinterpret_cast<u64>(GetCodePtr());
|
const u64 pc = reinterpret_cast<u64>(GetCodePtr());
|
||||||
const s64 adrp_offset = sext_21_bit((imm >> 12) - (pc >> 12)) << 12;
|
const s64 adrp_offset = sext_21_bit((imm >> 12) - (pc >> 12)) << 12;
|
||||||
const s64 adr_offset = sext_21_bit(imm - pc);
|
const s64 adr_offset = sext_21_bit(imm - pc);
|
||||||
const u64 adrp_base = (pc & ~0xFFF) + adrp_offset;
|
const u64 adrp_base = (pc & ~0xFFF) + adrp_offset;
|
||||||
const u64 adr_base = pc + adr_offset;
|
const u64 adr_base = pc + adr_offset;
|
||||||
|
|
||||||
// First: Try approaches for which instruction_count = max(parts.size(), 1)
|
|
||||||
try_base(T(0), Approach::MOVZ, true);
|
|
||||||
try_base(~T(0), Approach::MOVN, false);
|
|
||||||
|
|
||||||
// Second: Try approaches for which instruction_count = parts.size() + 1
|
|
||||||
if constexpr (sizeof(T) == 8)
|
if constexpr (sizeof(T) == 8)
|
||||||
{
|
{
|
||||||
try_base(adrp_base, Approach::ADRP, false);
|
try_base(adrp_base, Approach::ADRP, false);
|
||||||
try_base(adr_base, Approach::ADR, false);
|
try_base(adr_base, Approach::ADR, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try ORR (or skip it if we already have a 1-instruction encoding - these tests are non-trivial)
|
||||||
|
if (instructions_required(best_parts, best_approach) > 1)
|
||||||
|
{
|
||||||
|
if constexpr (sizeof(T) == 8)
|
||||||
|
{
|
||||||
|
for (u64 orr_imm : {(imm << 32) | (imm & 0x0000'0000'FFFF'FFFF),
|
||||||
|
(imm & 0xFFFF'FFFF'0000'0000) | (imm >> 32),
|
||||||
|
(imm << 48) | (imm & 0x0000'FFFF'FFFF'0000) | (imm >> 48)})
|
||||||
|
{
|
||||||
|
if (IsImmLogical(orr_imm, 64))
|
||||||
|
try_base(orr_imm, Approach::ORR, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (IsImmLogical(imm, 32))
|
||||||
|
try_base(imm, Approach::ORR, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size_t parts_uploaded = 0;
|
size_t parts_uploaded = 0;
|
||||||
|
|
||||||
// To kill any dependencies, we start with an instruction that overwrites the entire register
|
// To kill any dependencies, we start with an instruction that overwrites the entire register
|
||||||
|
@ -2124,6 +2151,12 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
|
||||||
case Approach::ADRP:
|
case Approach::ADRP:
|
||||||
ADRP(Rd, adrp_offset);
|
ADRP(Rd, adrp_offset);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case Approach::ORR:
|
||||||
|
constexpr ARM64Reg zero_reg = sizeof(T) == 8 ? ZR : WZR;
|
||||||
|
const bool success = TryORRI2R(Rd, zero_reg, best_base);
|
||||||
|
ASSERT(success);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// And then we use MOVK for the remaining parts
|
// And then we use MOVK for the remaining parts
|
||||||
|
@ -4330,7 +4363,7 @@ void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||||
ADDI2R_internal(Is64Bit(Rn) ? ZR : WZR, Rn, imm, true, true, scratch);
|
ADDI2R_internal(Is64Bit(Rn) ? ZR : WZR, Rn, imm, true, true, scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
|
||||||
{
|
{
|
||||||
if (const auto result = IsImmArithmetic(imm))
|
if (const auto result = IsImmArithmetic(imm))
|
||||||
{
|
{
|
||||||
|
@ -4342,7 +4375,7 @@ bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
|
||||||
{
|
{
|
||||||
if (const auto result = IsImmArithmetic(imm))
|
if (const auto result = IsImmArithmetic(imm))
|
||||||
{
|
{
|
||||||
|
@ -4354,7 +4387,7 @@ bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm)
|
bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm)
|
||||||
{
|
{
|
||||||
if (const auto result = IsImmArithmetic(imm))
|
if (const auto result = IsImmArithmetic(imm))
|
||||||
{
|
{
|
||||||
|
@ -4366,9 +4399,9 @@ bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
|
||||||
{
|
{
|
||||||
if (const auto result = IsImmLogical(imm, 32))
|
if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32))
|
||||||
{
|
{
|
||||||
const auto& [n, imm_s, imm_r] = *result;
|
const auto& [n, imm_s, imm_r] = *result;
|
||||||
AND(Rd, Rn, imm_r, imm_s, n != 0);
|
AND(Rd, Rn, imm_r, imm_s, n != 0);
|
||||||
|
@ -4377,9 +4410,10 @@ bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
|
||||||
|
bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
|
||||||
{
|
{
|
||||||
if (const auto result = IsImmLogical(imm, 32))
|
if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32))
|
||||||
{
|
{
|
||||||
const auto& [n, imm_s, imm_r] = *result;
|
const auto& [n, imm_s, imm_r] = *result;
|
||||||
ORR(Rd, Rn, imm_r, imm_s, n != 0);
|
ORR(Rd, Rn, imm_r, imm_s, n != 0);
|
||||||
|
@ -4388,9 +4422,10 @@ bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
|
|
||||||
|
bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
|
||||||
{
|
{
|
||||||
if (const auto result = IsImmLogical(imm, 32))
|
if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32))
|
||||||
{
|
{
|
||||||
const auto& [n, imm_s, imm_r] = *result;
|
const auto& [n, imm_s, imm_r] = *result;
|
||||||
EOR(Rd, Rn, imm_r, imm_s, n != 0);
|
EOR(Rd, Rn, imm_r, imm_s, n != 0);
|
||||||
|
|
|
@ -896,13 +896,13 @@ public:
|
||||||
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
|
||||||
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
|
||||||
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
|
||||||
bool TryCMPI2R(ARM64Reg Rn, u32 imm);
|
bool TryCMPI2R(ARM64Reg Rn, u64 imm);
|
||||||
|
|
||||||
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
|
||||||
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
|
||||||
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
|
||||||
|
|
||||||
// ABI related
|
// ABI related
|
||||||
void ABI_PushRegisters(BitSet32 registers);
|
void ABI_PushRegisters(BitSet32 registers);
|
||||||
|
|
Loading…
Reference in New Issue