2289 lines
82 KiB
C++
2289 lines
82 KiB
C++
// Copyright 2019, VIXL authors
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
// * Neither the name of ARM Limited nor the names of its contributors may be
|
|
// used to endorse or promote products derived from this software without
|
|
// specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
|
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include "macro-assembler-aarch64.h"
|
|
|
|
namespace vixl {
|
|
namespace aarch64 {
|
|
|
|
void MacroAssembler::AddSubHelper(AddSubHelperOption option,
|
|
const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(imm.FitsInLane(zd));
|
|
|
|
// Simple, encodable cases.
|
|
if (TrySingleAddSub(option, zd, zn, imm)) return;
|
|
|
|
VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
|
|
bool add_imm = (option == kAddImmediate);
|
|
|
|
// Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
|
|
// instruction. Also interpret the immediate as signed, so we can convert
|
|
// Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
|
|
IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
|
|
if (signed_imm.IsNegative()) {
|
|
AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
|
|
IntegerOperand n_imm(signed_imm.GetMagnitude());
|
|
// IntegerOperand can represent -INT_MIN, so this is always safe.
|
|
VIXL_ASSERT(n_imm.IsPositiveOrZero());
|
|
if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
|
|
}
|
|
|
|
// Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
|
|
Dup(scratch, imm);
|
|
|
|
SingleEmissionCheckScope guard(this);
|
|
if (add_imm) {
|
|
add(zd, zn, scratch);
|
|
} else {
|
|
sub(zd, zn, scratch);
|
|
}
|
|
}
|
|
|
|
bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
|
|
const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(imm.FitsInLane(zd));
|
|
|
|
int imm8;
|
|
int shift = -1;
|
|
if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
|
|
imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
switch (option) {
|
|
case kAddImmediate:
|
|
add(zd, zd, imm8, shift);
|
|
return true;
|
|
case kSubImmediate:
|
|
sub(zd, zd, imm8, shift);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
|
|
SVEArithPredicatedFn reg_macro,
|
|
const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm,
|
|
bool is_signed) {
|
|
if (is_signed) {
|
|
// E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
|
|
if (imm.IsInt8()) {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
(this->*imm_fn)(zd, zd, imm.AsInt8());
|
|
return;
|
|
}
|
|
} else {
|
|
// E.g. UMIN_z_zi, UMAX_z_zi
|
|
if (imm.IsUint8()) {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
(this->*imm_fn)(zd, zd, imm.AsUint8());
|
|
return;
|
|
}
|
|
}
|
|
|
|
UseScratchRegisterScope temps(this);
|
|
PRegister pg = temps.AcquireGoverningP();
|
|
Ptrue(pg.WithSameLaneSizeAs(zd));
|
|
|
|
// Try to re-use zd if we can, so we can avoid a movprfx.
|
|
ZRegister scratch =
|
|
zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
|
|
: zd;
|
|
Dup(scratch, imm);
|
|
|
|
// The vector-form macro for commutative operations will swap the arguments to
|
|
// avoid movprfx, if necessary.
|
|
(this->*reg_macro)(zd, pg.Merging(), zn, scratch);
|
|
}
|
|
|
|
void MacroAssembler::Mul(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
IntArithImmFn imm_fn = &Assembler::mul;
|
|
SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
|
|
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
|
|
}
|
|
|
|
void MacroAssembler::Smin(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInSignedLane(zd));
|
|
IntArithImmFn imm_fn = &Assembler::smin;
|
|
SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
|
|
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
|
|
}
|
|
|
|
void MacroAssembler::Smax(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInSignedLane(zd));
|
|
IntArithImmFn imm_fn = &Assembler::smax;
|
|
SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
|
|
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
|
|
}
|
|
|
|
void MacroAssembler::Umax(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
|
|
IntArithImmFn imm_fn = &Assembler::umax;
|
|
SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
|
|
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
|
|
}
|
|
|
|
void MacroAssembler::Umin(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
|
|
IntArithImmFn imm_fn = &Assembler::umin;
|
|
SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
|
|
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
|
|
}
|
|
|
|
void MacroAssembler::Addpl(const Register& xd,
|
|
const Register& xn,
|
|
int64_t multiplier) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
|
|
// This macro relies on `Rdvl` to handle some out-of-range cases. Check that
|
|
// `VL * multiplier` cannot overflow, for any possible value of VL.
|
|
VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
|
|
VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
|
|
|
|
if (xd.IsZero()) return;
|
|
if (xn.IsZero() && xd.IsSP()) {
|
|
// TODO: This operation doesn't make much sense, but we could support it
|
|
// with a scratch register if necessary.
|
|
VIXL_UNIMPLEMENTED();
|
|
}
|
|
|
|
// Handling xzr requires an extra move, so defer it until later so we can try
|
|
// to use `rdvl` instead (via `Addvl`).
|
|
if (IsInt6(multiplier) && !xn.IsZero()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
addpl(xd, xn, static_cast<int>(multiplier));
|
|
return;
|
|
}
|
|
|
|
// If `multiplier` is a multiple of 8, we can use `Addvl` instead.
|
|
if ((multiplier % kZRegBitsPerPRegBit) == 0) {
|
|
Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
|
|
return;
|
|
}
|
|
|
|
if (IsInt6(multiplier)) {
|
|
VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`.
|
|
// There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
|
|
// materialise a zero.
|
|
MacroEmissionCheckScope guard(this);
|
|
movz(xd, 0);
|
|
addpl(xd, xd, static_cast<int>(multiplier));
|
|
return;
|
|
}
|
|
|
|
// TODO: Some probable cases result in rather long sequences. For example,
|
|
// `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
|
|
// outside the encodable range. We should look for ways to cover such cases
|
|
// without drastically increasing the complexity of this logic.
|
|
|
|
// For other cases, calculate xn + (PL * multiplier) using discrete
|
|
// instructions. This requires two scratch registers in the general case, so
|
|
// try to re-use the destination as a scratch register.
|
|
UseScratchRegisterScope temps(this);
|
|
temps.Include(xd);
|
|
temps.Exclude(xn);
|
|
|
|
Register scratch = temps.AcquireX();
|
|
// Because there is no `rdpl`, so we have to calculate PL from VL. We can't
|
|
// scale the multiplier because (we already know) it isn't a multiple of 8.
|
|
Rdvl(scratch, multiplier);
|
|
|
|
MacroEmissionCheckScope guard(this);
|
|
if (xn.IsZero()) {
|
|
asr(xd, scratch, kZRegBitsPerPRegBitLog2);
|
|
} else if (xd.IsSP() || xn.IsSP()) {
|
|
// TODO: MacroAssembler::Add should be able to handle this.
|
|
asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
|
|
add(xd, xn, scratch);
|
|
} else {
|
|
add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Addvl(const Register& xd,
|
|
const Register& xn,
|
|
int64_t multiplier) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(xd.IsX());
|
|
VIXL_ASSERT(xn.IsX());
|
|
|
|
// Check that `VL * multiplier` cannot overflow, for any possible value of VL.
|
|
VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
|
|
VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
|
|
|
|
if (xd.IsZero()) return;
|
|
if (xn.IsZero() && xd.IsSP()) {
|
|
// TODO: This operation doesn't make much sense, but we could support it
|
|
// with a scratch register if necessary. `rdvl` cannot write into `sp`.
|
|
VIXL_UNIMPLEMENTED();
|
|
}
|
|
|
|
if (IsInt6(multiplier)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
if (xn.IsZero()) {
|
|
rdvl(xd, static_cast<int>(multiplier));
|
|
} else {
|
|
addvl(xd, xn, static_cast<int>(multiplier));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// TODO: Some probable cases result in rather long sequences. For example,
|
|
// `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
|
|
// outside the encodable range. We should look for ways to cover such cases
|
|
// without drastically increasing the complexity of this logic.
|
|
|
|
// For other cases, calculate xn + (VL * multiplier) using discrete
|
|
// instructions. This requires two scratch registers in the general case, so
|
|
// we try to re-use the destination as a scratch register.
|
|
UseScratchRegisterScope temps(this);
|
|
temps.Include(xd);
|
|
temps.Exclude(xn);
|
|
|
|
Register a = temps.AcquireX();
|
|
Mov(a, multiplier);
|
|
|
|
MacroEmissionCheckScope guard(this);
|
|
Register b = temps.AcquireX();
|
|
rdvl(b, 1);
|
|
if (xn.IsZero()) {
|
|
mul(xd, a, b);
|
|
} else if (xd.IsSP() || xn.IsSP()) {
|
|
mul(a, a, b);
|
|
add(xd, xn, a);
|
|
} else {
|
|
madd(xd, a, b, xn);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::CalculateSVEAddress(const Register& xd,
|
|
const SVEMemOperand& addr,
|
|
int vl_divisor_log2) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(!addr.IsScatterGather());
|
|
VIXL_ASSERT(xd.IsX());
|
|
|
|
// The lower bound is where a whole Z register is accessed.
|
|
VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
|
|
// The upper bound is for P register accesses, and for instructions like
|
|
// "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
|
|
VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
|
|
|
|
SVEOffsetModifier mod = addr.GetOffsetModifier();
|
|
Register base = addr.GetScalarBase();
|
|
|
|
if (addr.IsEquivalentToScalar()) {
|
|
// For example:
|
|
// [x0]
|
|
// [x0, #0]
|
|
// [x0, xzr, LSL 2]
|
|
Mov(xd, base);
|
|
} else if (addr.IsScalarPlusImmediate()) {
|
|
// For example:
|
|
// [x0, #42]
|
|
// [x0, #42, MUL VL]
|
|
int64_t offset = addr.GetImmediateOffset();
|
|
VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar.
|
|
if (addr.IsMulVl()) {
|
|
int vl_divisor = 1 << vl_divisor_log2;
|
|
// For all possible values of vl_divisor, we can simply use `Addpl`. This
|
|
// will select `addvl` if necessary.
|
|
VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
|
|
Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
|
|
} else {
|
|
// IsScalarPlusImmediate() ensures that no other modifiers can occur.
|
|
VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
|
|
Add(xd, base, offset);
|
|
}
|
|
} else if (addr.IsScalarPlusScalar()) {
|
|
// For example:
|
|
// [x0, x1]
|
|
// [x0, x1, LSL #4]
|
|
Register offset = addr.GetScalarOffset();
|
|
VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar.
|
|
if (mod == SVE_LSL) {
|
|
Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
|
|
} else {
|
|
// IsScalarPlusScalar() ensures that no other modifiers can occur.
|
|
VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
|
|
Add(xd, base, offset);
|
|
}
|
|
} else {
|
|
// All other forms are scatter-gather addresses, which cannot be evaluated
|
|
// into an X register.
|
|
VIXL_UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Cpy(const ZRegister& zd,
|
|
const PRegister& pg,
|
|
IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInLane(zd));
|
|
int imm8;
|
|
int shift;
|
|
if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
|
|
imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
cpy(zd, pg, imm8, shift);
|
|
return;
|
|
}
|
|
|
|
// The fallbacks rely on `cpy` variants that only support merging predication.
|
|
// If zeroing predication was requested, zero the destination first.
|
|
if (pg.IsZeroing()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
dup(zd, 0);
|
|
}
|
|
PRegisterM pg_m = pg.Merging();
|
|
|
|
// Try to encode the immediate using fcpy.
|
|
VIXL_ASSERT(imm.FitsInLane(zd));
|
|
if (zd.GetLaneSizeInBits() >= kHRegSize) {
|
|
double fp_imm = 0.0;
|
|
switch (zd.GetLaneSizeInBits()) {
|
|
case kHRegSize:
|
|
fp_imm =
|
|
FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
|
|
break;
|
|
case kSRegSize:
|
|
fp_imm = RawbitsToFloat(imm.AsUint32());
|
|
break;
|
|
case kDRegSize:
|
|
fp_imm = RawbitsToDouble(imm.AsUint64());
|
|
break;
|
|
default:
|
|
VIXL_UNREACHABLE();
|
|
break;
|
|
}
|
|
// IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
|
|
// we can use IsImmFP64 for all lane sizes.
|
|
if (IsImmFP64(fp_imm)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fcpy(zd, pg_m, fp_imm);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Fall back to using a scratch register.
|
|
UseScratchRegisterScope temps(this);
|
|
Register scratch = temps.AcquireRegisterToHoldLane(zd);
|
|
Mov(scratch, imm);
|
|
|
|
SingleEmissionCheckScope guard(this);
|
|
cpy(zd, pg_m, scratch);
|
|
}
|
|
|
|
// TODO: We implement Fcpy (amongst other things) for all FP types because it
|
|
// allows us to preserve user-specified NaNs. We should come up with some
|
|
// FPImmediate type to abstract this, and avoid all the duplication below (and
|
|
// elsewhere).
|
|
|
|
void MacroAssembler::Fcpy(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
double imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(pg.IsMerging());
|
|
|
|
if (IsImmFP64(imm)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fcpy(zd, pg, imm);
|
|
return;
|
|
}
|
|
|
|
// As a fall-back, cast the immediate to the required lane size, and try to
|
|
// encode the bit pattern using `Cpy`.
|
|
Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
|
|
}
|
|
|
|
void MacroAssembler::Fcpy(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
float imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(pg.IsMerging());
|
|
|
|
if (IsImmFP32(imm)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fcpy(zd, pg, imm);
|
|
return;
|
|
}
|
|
|
|
// As a fall-back, cast the immediate to the required lane size, and try to
|
|
// encode the bit pattern using `Cpy`.
|
|
Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
|
|
}
|
|
|
|
void MacroAssembler::Fcpy(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
Float16 imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(pg.IsMerging());
|
|
|
|
if (IsImmFP16(imm)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fcpy(zd, pg, imm);
|
|
return;
|
|
}
|
|
|
|
// As a fall-back, cast the immediate to the required lane size, and try to
|
|
// encode the bit pattern using `Cpy`.
|
|
Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
|
|
}
|
|
|
|
void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInLane(zd));
|
|
unsigned lane_size = zd.GetLaneSizeInBits();
|
|
int imm8;
|
|
int shift;
|
|
if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
|
|
imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
dup(zd, imm8, shift);
|
|
} else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
dupm(zd, imm.AsUintN(lane_size));
|
|
} else {
|
|
UseScratchRegisterScope temps(this);
|
|
Register scratch = temps.AcquireRegisterToHoldLane(zd);
|
|
Mov(scratch, imm);
|
|
|
|
SingleEmissionCheckScope guard(this);
|
|
dup(zd, scratch);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::NoncommutativeArithmeticHelper(
|
|
const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
SVEArithPredicatedFn fn,
|
|
SVEArithPredicatedFn rev_fn) {
|
|
if (zd.Aliases(zn)) {
|
|
// E.g. zd = zd / zm
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zd, pg, zn, zm);
|
|
} else if (zd.Aliases(zm)) {
|
|
// E.g. zd = zn / zd
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*rev_fn)(zd, pg, zm, zn);
|
|
} else {
|
|
// E.g. zd = zn / zm
|
|
MovprfxHelperScope guard(this, zd, pg, zn);
|
|
(this->*fn)(zd, pg, zd, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::FPCommutativeArithmeticHelper(
|
|
const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
SVEArithPredicatedFn fn,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
ResolveFPNaNPropagationOption(&nan_option);
|
|
|
|
if (zd.Aliases(zn)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zd, pg, zd, zm);
|
|
} else if (zd.Aliases(zm)) {
|
|
switch (nan_option) {
|
|
case FastNaNPropagation: {
|
|
// Swap the arguments.
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zd, pg, zd, zn);
|
|
return;
|
|
}
|
|
case StrictNaNPropagation: {
|
|
UseScratchRegisterScope temps(this);
|
|
// Use a scratch register to keep the argument order exactly as
|
|
// specified.
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, pg, zn);
|
|
(this->*fn)(scratch, pg, scratch, zm);
|
|
}
|
|
Mov(zd, scratch);
|
|
return;
|
|
}
|
|
case NoFPMacroNaNPropagationSelected:
|
|
VIXL_UNREACHABLE();
|
|
return;
|
|
}
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, pg, zn);
|
|
(this->*fn)(zd, pg, zd, zm);
|
|
}
|
|
}
|
|
|
|
// Instructions of the form "inst zda, zn, zm, #num", where they are
|
|
// non-commutative and no reversed form is provided.
|
|
#define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
|
|
V(Cmla, cmla) \
|
|
V(Sqrdcmlah, sqrdcmlah)
|
|
|
|
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
|
|
void MacroAssembler::MASMFN(const ZRegister& zd, \
|
|
const ZRegister& za, \
|
|
const ZRegister& zn, \
|
|
const ZRegister& zm, \
|
|
int imm) { \
|
|
if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
|
|
UseScratchRegisterScope temps(this); \
|
|
VIXL_ASSERT(AreSameLaneSize(zn, zm)); \
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \
|
|
Mov(ztmp, zd.Aliases(zn) ? zn : zm); \
|
|
MovprfxHelperScope guard(this, zd, za); \
|
|
ASMFN(zd, \
|
|
(zd.Aliases(zn) ? ztmp : zn), \
|
|
(zd.Aliases(zm) ? ztmp : zm), \
|
|
imm); \
|
|
} else { \
|
|
MovprfxHelperScope guard(this, zd, za); \
|
|
ASMFN(zd, zn, zm, imm); \
|
|
} \
|
|
}
|
|
VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
|
|
// Instructions of the form "inst zda, zn, zm, #num, #num", where they are
|
|
// non-commutative and no reversed form is provided.
|
|
#define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
|
|
V(Cmla, cmla) \
|
|
V(Sqrdcmlah, sqrdcmlah)
|
|
|
|
// This doesn't handle zm when it's out of the range that can be encoded in
|
|
// instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
|
|
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
|
|
void MacroAssembler::MASMFN(const ZRegister& zd, \
|
|
const ZRegister& za, \
|
|
const ZRegister& zn, \
|
|
const ZRegister& zm, \
|
|
int index, \
|
|
int rot) { \
|
|
if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
|
|
UseScratchRegisterScope temps(this); \
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \
|
|
{ \
|
|
MovprfxHelperScope guard(this, ztmp, za); \
|
|
ASMFN(ztmp, zn, zm, index, rot); \
|
|
} \
|
|
Mov(zd, ztmp); \
|
|
} else { \
|
|
MovprfxHelperScope guard(this, zd, za); \
|
|
ASMFN(zd, zn, zm, index, rot); \
|
|
} \
|
|
}
|
|
VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
|
|
// Instructions of the form "inst zda, pg, zda, zn", where they are
|
|
// non-commutative and no reversed form is provided.
|
|
#define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
|
|
V(Addp, addp) \
|
|
V(Bic, bic) \
|
|
V(Faddp, faddp) \
|
|
V(Fmaxnmp, fmaxnmp) \
|
|
V(Fminnmp, fminnmp) \
|
|
V(Fmaxp, fmaxp) \
|
|
V(Fminp, fminp) \
|
|
V(Fscale, fscale) \
|
|
V(Smaxp, smaxp) \
|
|
V(Sminp, sminp) \
|
|
V(Suqadd, suqadd) \
|
|
V(Umaxp, umaxp) \
|
|
V(Uminp, uminp) \
|
|
V(Usqadd, usqadd)
|
|
|
|
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
|
|
void MacroAssembler::MASMFN(const ZRegister& zd, \
|
|
const PRegisterM& pg, \
|
|
const ZRegister& zn, \
|
|
const ZRegister& zm) { \
|
|
VIXL_ASSERT(allow_macro_instructions_); \
|
|
if (zd.Aliases(zm) && !zd.Aliases(zn)) { \
|
|
UseScratchRegisterScope temps(this); \
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
|
|
Mov(scratch, zm); \
|
|
MovprfxHelperScope guard(this, zd, pg, zn); \
|
|
ASMFN(zd, pg, zd, scratch); \
|
|
} else { \
|
|
MovprfxHelperScope guard(this, zd, pg, zn); \
|
|
ASMFN(zd, pg, zd, zm); \
|
|
} \
|
|
}
|
|
VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
|
|
// Instructions of the form "inst zda, pg, zda, zn", where they are
|
|
// non-commutative and a reversed form is provided.
|
|
#define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
|
|
V(Asr, asr) \
|
|
V(Fdiv, fdiv) \
|
|
V(Fsub, fsub) \
|
|
V(Lsl, lsl) \
|
|
V(Lsr, lsr) \
|
|
V(Sdiv, sdiv) \
|
|
V(Shsub, shsub) \
|
|
V(Sqrshl, sqrshl) \
|
|
V(Sqshl, sqshl) \
|
|
V(Sqsub, sqsub) \
|
|
V(Srshl, srshl) \
|
|
V(Sub, sub) \
|
|
V(Udiv, udiv) \
|
|
V(Uhsub, uhsub) \
|
|
V(Uqrshl, uqrshl) \
|
|
V(Uqshl, uqshl) \
|
|
V(Uqsub, uqsub) \
|
|
V(Urshl, urshl)
|
|
|
|
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
|
|
void MacroAssembler::MASMFN(const ZRegister& zd, \
|
|
const PRegisterM& pg, \
|
|
const ZRegister& zn, \
|
|
const ZRegister& zm) { \
|
|
VIXL_ASSERT(allow_macro_instructions_); \
|
|
NoncommutativeArithmeticHelper(zd, \
|
|
pg, \
|
|
zn, \
|
|
zm, \
|
|
static_cast<SVEArithPredicatedFn>( \
|
|
&Assembler::ASMFN), \
|
|
static_cast<SVEArithPredicatedFn>( \
|
|
&Assembler::ASMFN##r)); \
|
|
}
|
|
VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
|
|
void MacroAssembler::Fadd(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fadd),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fabd(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fabd),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fmul(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fmul),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fmulx(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fmulx),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fmax(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fmax),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fmin(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fmin),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fmaxnm(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fmaxnm),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fminnm(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPCommutativeArithmeticHelper(zd,
|
|
pg,
|
|
zn,
|
|
zm,
|
|
static_cast<SVEArithPredicatedFn>(
|
|
&Assembler::fminnm),
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
|
|
switch (zd.GetLaneSizeInBits()) {
|
|
case kHRegSize:
|
|
Fdup(zd, Float16(imm));
|
|
break;
|
|
case kSRegSize:
|
|
Fdup(zd, static_cast<float>(imm));
|
|
break;
|
|
case kDRegSize:
|
|
uint64_t bits = DoubleToRawbits(imm);
|
|
if (IsImmFP64(bits)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fdup(zd, imm);
|
|
} else {
|
|
Dup(zd, bits);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
|
|
switch (zd.GetLaneSizeInBits()) {
|
|
case kHRegSize:
|
|
Fdup(zd, Float16(imm));
|
|
break;
|
|
case kSRegSize:
|
|
if (IsImmFP32(imm)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fdup(zd, imm);
|
|
} else {
|
|
Dup(zd, FloatToRawbits(imm));
|
|
}
|
|
break;
|
|
case kDRegSize:
|
|
Fdup(zd, static_cast<double>(imm));
|
|
break;
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
|
|
switch (zd.GetLaneSizeInBits()) {
|
|
case kHRegSize:
|
|
if (IsImmFP16(imm)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
fdup(zd, imm);
|
|
} else {
|
|
Dup(zd, Float16ToRawbits(imm));
|
|
}
|
|
break;
|
|
case kSRegSize:
|
|
Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
|
|
break;
|
|
case kDRegSize:
|
|
Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
|
|
break;
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Index(const ZRegister& zd,
|
|
const Operand& start,
|
|
const Operand& step) {
|
|
class IndexOperand : public Operand {
|
|
public:
|
|
static IndexOperand Prepare(MacroAssembler* masm,
|
|
UseScratchRegisterScope* temps,
|
|
const Operand& op,
|
|
const ZRegister& zd_inner) {
|
|
// Look for encodable immediates.
|
|
int imm;
|
|
if (op.IsImmediate()) {
|
|
if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
|
|
return IndexOperand(imm);
|
|
}
|
|
Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
|
|
masm->Mov(scratch, op);
|
|
return IndexOperand(scratch);
|
|
} else {
|
|
// Plain registers can be encoded directly.
|
|
VIXL_ASSERT(op.IsPlainRegister());
|
|
return IndexOperand(op.GetRegister());
|
|
}
|
|
}
|
|
|
|
int GetImm5() const {
|
|
int64_t imm = GetImmediate();
|
|
VIXL_ASSERT(IsInt5(imm));
|
|
return static_cast<int>(imm);
|
|
}
|
|
|
|
private:
|
|
explicit IndexOperand(const Register& reg) : Operand(reg) {}
|
|
explicit IndexOperand(int64_t imm) : Operand(imm) {}
|
|
};
|
|
|
|
UseScratchRegisterScope temps(this);
|
|
IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
|
|
IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
|
|
|
|
SingleEmissionCheckScope guard(this);
|
|
if (start_enc.IsImmediate()) {
|
|
if (step_enc.IsImmediate()) {
|
|
index(zd, start_enc.GetImm5(), step_enc.GetImm5());
|
|
} else {
|
|
index(zd, start_enc.GetImm5(), step_enc.GetRegister());
|
|
}
|
|
} else {
|
|
if (step_enc.IsImmediate()) {
|
|
index(zd, start_enc.GetRegister(), step_enc.GetImm5());
|
|
} else {
|
|
index(zd, start_enc.GetRegister(), step_enc.GetRegister());
|
|
}
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(imm.FitsInLane(zdn));
|
|
|
|
if (imm.IsZero()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
insr(zdn, xzr);
|
|
return;
|
|
}
|
|
|
|
UseScratchRegisterScope temps(this);
|
|
Register scratch = temps.AcquireRegisterToHoldLane(zdn);
|
|
|
|
// TODO: There are many cases where we could optimise immediates, such as by
|
|
// detecting repeating patterns or FP immediates. We should optimise and
|
|
// abstract this for use in other SVE mov-immediate-like macros.
|
|
Mov(scratch, imm);
|
|
|
|
SingleEmissionCheckScope guard(this);
|
|
insr(zdn, scratch);
|
|
}
|
|
|
|
void MacroAssembler::Mla(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (zd.Aliases(za)) {
|
|
// zda = zda + (zn * zm)
|
|
SingleEmissionCheckScope guard(this);
|
|
mla(zd, pg, zn, zm);
|
|
} else if (zd.Aliases(zn)) {
|
|
// zdn = za + (zdn * zm)
|
|
SingleEmissionCheckScope guard(this);
|
|
mad(zd, pg, zm, za);
|
|
} else if (zd.Aliases(zm)) {
|
|
// Multiplication is commutative, so we can swap zn and zm.
|
|
// zdm = za + (zdm * zn)
|
|
SingleEmissionCheckScope guard(this);
|
|
mad(zd, pg, zn, za);
|
|
} else {
|
|
// zd = za + (zn * zm)
|
|
ExactAssemblyScope guard(this, 2 * kInstructionSize);
|
|
movprfx(zd, pg, za);
|
|
mla(zd, pg, zn, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Mls(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (zd.Aliases(za)) {
|
|
// zda = zda - (zn * zm)
|
|
SingleEmissionCheckScope guard(this);
|
|
mls(zd, pg, zn, zm);
|
|
} else if (zd.Aliases(zn)) {
|
|
// zdn = za - (zdn * zm)
|
|
SingleEmissionCheckScope guard(this);
|
|
msb(zd, pg, zm, za);
|
|
} else if (zd.Aliases(zm)) {
|
|
// Multiplication is commutative, so we can swap zn and zm.
|
|
// zdm = za - (zdm * zn)
|
|
SingleEmissionCheckScope guard(this);
|
|
msb(zd, pg, zn, za);
|
|
} else {
|
|
// zd = za - (zn * zm)
|
|
ExactAssemblyScope guard(this, 2 * kInstructionSize);
|
|
movprfx(zd, pg, za);
|
|
mls(zd, pg, zn, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::CompareHelper(Condition cond,
|
|
const PRegisterWithLaneSize& pd,
|
|
const PRegisterZ& pg,
|
|
const ZRegister& zn,
|
|
IntegerOperand imm) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
|
|
Dup(zm, imm);
|
|
SingleEmissionCheckScope guard(this);
|
|
cmp(cond, pd, pg, zn, zm);
|
|
}
|
|
|
|
void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
|
|
const PRegister& pg,
|
|
const PRegisterWithLaneSize& pn) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(pd.IsLaneSizeB());
|
|
VIXL_ASSERT(pn.IsLaneSizeB());
|
|
if (pd.Is(pn)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
pfirst(pd, pg, pn);
|
|
} else {
|
|
UseScratchRegisterScope temps(this);
|
|
PRegister temp_pg = pg;
|
|
if (pd.Aliases(pg)) {
|
|
temp_pg = temps.AcquireP();
|
|
Mov(temp_pg.VnB(), pg.VnB());
|
|
}
|
|
Mov(pd, pn);
|
|
SingleEmissionCheckScope guard(this);
|
|
pfirst(pd, temp_pg, pd);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
|
|
const PRegister& pg,
|
|
const PRegisterWithLaneSize& pn) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(AreSameFormat(pd, pn));
|
|
if (pd.Is(pn)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
pnext(pd, pg, pn);
|
|
} else {
|
|
UseScratchRegisterScope temps(this);
|
|
PRegister temp_pg = pg;
|
|
if (pd.Aliases(pg)) {
|
|
temp_pg = temps.AcquireP();
|
|
Mov(temp_pg.VnB(), pg.VnB());
|
|
}
|
|
Mov(pd.VnB(), pn.VnB());
|
|
SingleEmissionCheckScope guard(this);
|
|
pnext(pd, temp_pg, pd);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
|
|
SVEPredicateConstraint pattern,
|
|
FlagsUpdate s) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
switch (s) {
|
|
case LeaveFlags:
|
|
Ptrue(pd, pattern);
|
|
return;
|
|
case SetFlags:
|
|
Ptrues(pd, pattern);
|
|
return;
|
|
}
|
|
VIXL_UNREACHABLE();
|
|
}
|
|
|
|
void MacroAssembler::Sub(const ZRegister& zd,
|
|
IntegerOperand imm,
|
|
const ZRegister& zm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
|
|
int imm8;
|
|
int shift = -1;
|
|
if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
|
|
imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
|
|
MovprfxHelperScope guard(this, zd, zm);
|
|
subr(zd, zd, imm8, shift);
|
|
} else {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
|
|
Dup(scratch, imm);
|
|
|
|
SingleEmissionCheckScope guard(this);
|
|
sub(zd, scratch, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr,
|
|
SVELoadBroadcastFn fn,
|
|
int divisor) {
|
|
VIXL_ASSERT(addr.IsScalarPlusImmediate());
|
|
int64_t imm = addr.GetImmediateOffset();
|
|
if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
} else {
|
|
UseScratchRegisterScope temps(this);
|
|
Register scratch = temps.AcquireX();
|
|
CalculateSVEAddress(scratch, addr, zt);
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, SVEMemOperand(scratch));
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
|
|
const SVEMemOperand& addr,
|
|
SVELoadStoreFn fn) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
|
|
|
|
if (addr.IsPlainScalar() ||
|
|
(addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
|
|
addr.IsMulVl())) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(rt, addr);
|
|
return;
|
|
}
|
|
|
|
if (addr.IsEquivalentToScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
|
|
return;
|
|
}
|
|
|
|
UseScratchRegisterScope temps(this);
|
|
Register scratch = temps.AcquireX();
|
|
CalculateSVEAddress(scratch, addr, rt);
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(rt, SVEMemOperand(scratch));
|
|
}
|
|
|
|
template <typename Tg, typename Tf>
|
|
void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
|
|
const ZRegister& zt,
|
|
const Tg& pg,
|
|
const SVEMemOperand& addr,
|
|
Tf fn,
|
|
int imm_bits,
|
|
int shift_amount,
|
|
SVEOffsetModifier supported_modifier,
|
|
int vl_divisor_log2) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
int imm_divisor = 1 << shift_amount;
|
|
|
|
if (addr.IsPlainScalar() ||
|
|
(addr.IsScalarPlusImmediate() &&
|
|
IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
|
|
((addr.GetImmediateOffset() % imm_divisor) == 0) &&
|
|
(addr.GetOffsetModifier() == supported_modifier))) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
return;
|
|
}
|
|
|
|
if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
|
|
addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
return;
|
|
}
|
|
|
|
if (addr.IsEquivalentToScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
|
|
return;
|
|
}
|
|
|
|
if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
|
|
(vl_divisor_log2 == -1)) {
|
|
// We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
|
|
// dependent.
|
|
VIXL_UNIMPLEMENTED();
|
|
}
|
|
|
|
UseScratchRegisterScope temps(this);
|
|
Register scratch = temps.AcquireX();
|
|
CalculateSVEAddress(scratch, addr, vl_divisor_log2);
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, SVEMemOperand(scratch));
|
|
}
|
|
|
|
template <typename Tg, typename Tf>
|
|
void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
|
|
const ZRegister& zt,
|
|
const Tg& pg,
|
|
const SVEMemOperand& addr,
|
|
Tf fn) {
|
|
if (addr.IsPlainScalar() ||
|
|
(addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
|
|
addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
|
|
(addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
|
|
addr.IsMulVl())) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
return;
|
|
}
|
|
|
|
if (addr.IsEquivalentToScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
|
|
return;
|
|
}
|
|
|
|
if (addr.IsVectorPlusImmediate()) {
|
|
uint64_t offset = addr.GetImmediateOffset();
|
|
if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
|
|
IsUint5(offset >> msize_in_bytes_log2)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (addr.IsScalarPlusVector()) {
|
|
VIXL_ASSERT(addr.IsScatterGather());
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
return;
|
|
}
|
|
|
|
UseScratchRegisterScope temps(this);
|
|
if (addr.IsScatterGather()) {
|
|
// In scatter-gather modes, zt and zn/zm have the same lane size. However,
|
|
// for 32-bit accesses, the result of each lane's address calculation still
|
|
// requires 64 bits; we can't naively use `Adr` for the address calculation
|
|
// because it would truncate each address to 32 bits.
|
|
|
|
if (addr.IsVectorPlusImmediate()) {
|
|
// Synthesise the immediate in an X register, then use a
|
|
// scalar-plus-vector access with the original vector.
|
|
Register scratch = temps.AcquireX();
|
|
Mov(scratch, addr.GetImmediateOffset());
|
|
SingleEmissionCheckScope guard(this);
|
|
SVEOffsetModifier om =
|
|
zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
|
|
(this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
|
|
return;
|
|
}
|
|
|
|
VIXL_UNIMPLEMENTED();
|
|
} else {
|
|
Register scratch = temps.AcquireX();
|
|
// TODO: If we have an immediate offset that is a multiple of
|
|
// msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
|
|
// save an instruction.
|
|
int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
|
|
CalculateSVEAddress(scratch, addr, vl_divisor_log2);
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, SVEMemOperand(scratch));
|
|
}
|
|
}
|
|
|
|
template <typename Tf>
|
|
void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
|
|
const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr,
|
|
Tf fn) {
|
|
if (addr.IsScatterGather()) {
|
|
// Scatter-gather first-fault loads share encodings with normal loads.
|
|
SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
|
|
return;
|
|
}
|
|
|
|
// Contiguous first-faulting loads have no scalar-plus-immediate form at all,
|
|
// so we don't do immediate synthesis.
|
|
|
|
// We cannot currently distinguish "[x0]" from "[x0, #0]", and this
|
|
// is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
|
|
if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
|
|
addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zt, pg, addr);
|
|
return;
|
|
}
|
|
|
|
VIXL_UNIMPLEMENTED();
|
|
}
|
|
|
|
void MacroAssembler::Ld1b(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kBRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1b));
|
|
}
|
|
|
|
void MacroAssembler::Ld1h(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kHRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1h));
|
|
}
|
|
|
|
void MacroAssembler::Ld1w(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kWRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1w));
|
|
}
|
|
|
|
void MacroAssembler::Ld1d(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kDRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1d));
|
|
}
|
|
|
|
void MacroAssembler::Ld1sb(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kBRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1sb));
|
|
}
|
|
|
|
void MacroAssembler::Ld1sh(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kHRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1sh));
|
|
}
|
|
|
|
void MacroAssembler::Ld1sw(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kSRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ld1sw));
|
|
}
|
|
|
|
void MacroAssembler::St1b(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kBRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVEStore1Fn>(&Assembler::st1b));
|
|
}
|
|
|
|
void MacroAssembler::St1h(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kHRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVEStore1Fn>(&Assembler::st1h));
|
|
}
|
|
|
|
void MacroAssembler::St1w(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kSRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVEStore1Fn>(&Assembler::st1w));
|
|
}
|
|
|
|
void MacroAssembler::St1d(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadStore1Helper(kDRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVEStore1Fn>(&Assembler::st1d));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1b(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kBRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1b));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1h(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kHRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1h));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1w(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kSRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1w));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1d(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kDRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1d));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1sb(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kBRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1sh(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kHRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
|
|
}
|
|
|
|
void MacroAssembler::Ldff1sw(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVELoadFFHelper(kSRegSizeInBytesLog2,
|
|
zt,
|
|
pg,
|
|
addr,
|
|
static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
|
|
}
|
|
|
|
#define VIXL_SVE_LD1R_LIST(V) \
|
|
V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
|
|
|
|
#define VIXL_DEFINE_MASM_FUNC(SZ, SH) \
|
|
void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \
|
|
const PRegisterZ& pg, \
|
|
const SVEMemOperand& addr) { \
|
|
VIXL_ASSERT(allow_macro_instructions_); \
|
|
SVELoadStoreNTBroadcastQOHelper(zt, \
|
|
pg, \
|
|
addr, \
|
|
&MacroAssembler::ld1r##SZ, \
|
|
4, \
|
|
SH, \
|
|
NO_SVE_OFFSET_MODIFIER, \
|
|
-1); \
|
|
}
|
|
|
|
VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
#undef VIXL_SVE_LD1R_LIST
|
|
|
|
void MacroAssembler::Ldnt1b(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
ldnt1b(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::ldnt1b,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Ldnt1d(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
ldnt1d(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::ldnt1d,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Ldnt1h(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
ldnt1h(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::ldnt1h,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Ldnt1w(const ZRegister& zt,
|
|
const PRegisterZ& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
ldnt1w(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::ldnt1w,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Stnt1b(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
stnt1b(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::stnt1b,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
void MacroAssembler::Stnt1d(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
stnt1d(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::stnt1d,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
void MacroAssembler::Stnt1h(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
stnt1h(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::stnt1h,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
void MacroAssembler::Stnt1w(const ZRegister& zt,
|
|
const PRegister& pg,
|
|
const SVEMemOperand& addr) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (addr.IsVectorPlusScalar()) {
|
|
SingleEmissionCheckScope guard(this);
|
|
stnt1w(zt, pg, addr);
|
|
} else {
|
|
SVELoadStoreNTBroadcastQOHelper(zt,
|
|
pg,
|
|
addr,
|
|
&MacroAssembler::stnt1w,
|
|
4,
|
|
0,
|
|
SVE_MUL_VL);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int index) {
|
|
if (zd.Aliases(za)) {
|
|
// zda = zda + (zn . zm)
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zd, zn, zm, index);
|
|
|
|
} else if (zd.Aliases(zn) || zd.Aliases(zm)) {
|
|
// zdn = za + (zdn . zm[index])
|
|
// zdm = za + (zn . zdm[index])
|
|
// zdnm = za + (zdnm . zdnm[index])
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, za);
|
|
(this->*fn)(scratch, zn, zm, index);
|
|
}
|
|
|
|
Mov(zd, scratch);
|
|
} else {
|
|
// zd = za + (zn . zm)
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zn, zm, index);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
|
|
// zd = za . zd . zm
|
|
// zd = za . zn . zd
|
|
// zd = za . zd . zd
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, za);
|
|
(this->*fn)(scratch, zn, zm);
|
|
}
|
|
|
|
Mov(zd, scratch);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zn, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
|
|
// zd = za . zd . zm
|
|
// zd = za . zn . zd
|
|
// zd = za . zd . zd
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, za);
|
|
(this->*fn)(scratch, scratch, zn, zm);
|
|
}
|
|
|
|
Mov(zd, scratch);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zd, zn, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int imm) {
|
|
if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
|
|
// zd = za . zd . zm[i]
|
|
// zd = za . zn . zd[i]
|
|
// zd = za . zd . zd[i]
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, za);
|
|
(this->*fn)(scratch, zn, zm, imm);
|
|
}
|
|
|
|
Mov(zd, scratch);
|
|
} else {
|
|
// zd = za . zn . zm[i]
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zn, zm, imm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
if (zn.Aliases(zm)) {
|
|
// If zn == zm, the difference is zero.
|
|
if (!zd.Aliases(za)) {
|
|
Mov(zd, za);
|
|
}
|
|
} else if (zd.Aliases(za)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn)(zd, zn, zm);
|
|
} else if (zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
|
|
Mov(ztmp, zn);
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, ztmp, zm);
|
|
} else if (zd.Aliases(zm)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
|
|
Mov(ztmp, zm);
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zn, ztmp);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zn, zm);
|
|
}
|
|
}
|
|
|
|
#define VIXL_SVE_4REG_LIST(V) \
|
|
V(Saba, saba, AbsoluteDifferenceAccumulate) \
|
|
V(Uaba, uaba, AbsoluteDifferenceAccumulate) \
|
|
V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \
|
|
V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \
|
|
V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \
|
|
V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \
|
|
V(Sdot, sdot, FourRegDestructiveHelper) \
|
|
V(Udot, udot, FourRegDestructiveHelper) \
|
|
V(Adclb, adclb, FourRegDestructiveHelper) \
|
|
V(Adclt, adclt, FourRegDestructiveHelper) \
|
|
V(Sbclb, sbclb, FourRegDestructiveHelper) \
|
|
V(Sbclt, sbclt, FourRegDestructiveHelper) \
|
|
V(Smlalb, smlalb, FourRegDestructiveHelper) \
|
|
V(Smlalt, smlalt, FourRegDestructiveHelper) \
|
|
V(Smlslb, smlslb, FourRegDestructiveHelper) \
|
|
V(Smlslt, smlslt, FourRegDestructiveHelper) \
|
|
V(Umlalb, umlalb, FourRegDestructiveHelper) \
|
|
V(Umlalt, umlalt, FourRegDestructiveHelper) \
|
|
V(Umlslb, umlslb, FourRegDestructiveHelper) \
|
|
V(Umlslt, umlslt, FourRegDestructiveHelper) \
|
|
V(Bcax, bcax, FourRegDestructiveHelper) \
|
|
V(Bsl, bsl, FourRegDestructiveHelper) \
|
|
V(Bsl1n, bsl1n, FourRegDestructiveHelper) \
|
|
V(Bsl2n, bsl2n, FourRegDestructiveHelper) \
|
|
V(Eor3, eor3, FourRegDestructiveHelper) \
|
|
V(Nbsl, nbsl, FourRegDestructiveHelper) \
|
|
V(Fmlalb, fmlalb, FourRegDestructiveHelper) \
|
|
V(Fmlalt, fmlalt, FourRegDestructiveHelper) \
|
|
V(Fmlslb, fmlslb, FourRegDestructiveHelper) \
|
|
V(Fmlslt, fmlslt, FourRegDestructiveHelper) \
|
|
V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \
|
|
V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
|
|
V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \
|
|
V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \
|
|
V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
|
|
V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \
|
|
V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \
|
|
V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \
|
|
V(Fmmla, fmmla, FourRegDestructiveHelper) \
|
|
V(Smmla, smmla, FourRegDestructiveHelper) \
|
|
V(Ummla, ummla, FourRegDestructiveHelper) \
|
|
V(Usmmla, usmmla, FourRegDestructiveHelper) \
|
|
V(Usdot, usdot, FourRegDestructiveHelper)
|
|
|
|
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
|
|
void MacroAssembler::MASMFN(const ZRegister& zd, \
|
|
const ZRegister& za, \
|
|
const ZRegister& zn, \
|
|
const ZRegister& zm) { \
|
|
VIXL_ASSERT(allow_macro_instructions_); \
|
|
HELPER(&Assembler::ASMFN, zd, za, zn, zm); \
|
|
}
|
|
VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
|
|
#define VIXL_SVE_4REG_1IMM_LIST(V) \
|
|
V(Fmla, fmla, FourRegOneImmDestructiveHelper) \
|
|
V(Fmls, fmls, FourRegOneImmDestructiveHelper) \
|
|
V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \
|
|
V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \
|
|
V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \
|
|
V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \
|
|
V(Mla, mla, FourRegOneImmDestructiveHelper) \
|
|
V(Mls, mls, FourRegOneImmDestructiveHelper) \
|
|
V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \
|
|
V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \
|
|
V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \
|
|
V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \
|
|
V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
|
|
V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
|
|
V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
|
|
V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
|
|
V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
|
|
V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
|
|
V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \
|
|
V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \
|
|
V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \
|
|
V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
|
|
|
|
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
|
|
void MacroAssembler::MASMFN(const ZRegister& zd, \
|
|
const ZRegister& za, \
|
|
const ZRegister& zn, \
|
|
const ZRegister& zm, \
|
|
int imm) { \
|
|
VIXL_ASSERT(allow_macro_instructions_); \
|
|
HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \
|
|
}
|
|
VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
|
|
#undef VIXL_DEFINE_MASM_FUNC
|
|
|
|
void MacroAssembler::Sdot(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int index) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
|
|
}
|
|
|
|
void MacroAssembler::Udot(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int index) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
|
|
}
|
|
|
|
void MacroAssembler::Sudot(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int index) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
|
|
}
|
|
|
|
void MacroAssembler::Usdot(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int index) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
|
|
}
|
|
|
|
void MacroAssembler::Cdot(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int index,
|
|
int rot) {
|
|
// This doesn't handle zm when it's out of the range that can be encoded in
|
|
// instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
|
|
if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, ztmp, za);
|
|
cdot(ztmp, zn, zm, index, rot);
|
|
}
|
|
Mov(zd, ztmp);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
cdot(zd, zn, zm, index, rot);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Cdot(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int rot) {
|
|
if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
|
|
UseScratchRegisterScope temps(this);
|
|
VIXL_ASSERT(AreSameLaneSize(zn, zm));
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
|
|
Mov(ztmp, zd.Aliases(zn) ? zn : zm);
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
cdot(zd, zn, zm, rot);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
SVEMulAddPredicatedZdaFn fn_zda,
|
|
SVEMulAddPredicatedZdnFn fn_zdn,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
ResolveFPNaNPropagationOption(&nan_option);
|
|
|
|
if (zd.Aliases(za)) {
|
|
// zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn_zda)(zd, pg, zn, zm);
|
|
} else if (zd.Aliases(zn)) {
|
|
// zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn_zdn)(zd, pg, zm, za);
|
|
} else if (zd.Aliases(zm)) {
|
|
switch (nan_option) {
|
|
case FastNaNPropagation: {
|
|
// We treat multiplication as commutative in the fast mode, so we can
|
|
// swap zn and zm.
|
|
// zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
|
|
SingleEmissionCheckScope guard(this);
|
|
(this->*fn_zdn)(zd, pg, zn, za);
|
|
return;
|
|
}
|
|
case StrictNaNPropagation: {
|
|
UseScratchRegisterScope temps(this);
|
|
// Use a scratch register to keep the argument order exactly as
|
|
// specified.
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, pg, za);
|
|
// scratch = (-)za + ((-)zn * zm)
|
|
(this->*fn_zda)(scratch, pg, zn, zm);
|
|
}
|
|
Mov(zd, scratch);
|
|
return;
|
|
}
|
|
case NoFPMacroNaNPropagationSelected:
|
|
VIXL_UNREACHABLE();
|
|
return;
|
|
}
|
|
} else {
|
|
// zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
|
|
MovprfxHelperScope guard(this, zd, pg, za);
|
|
(this->*fn_zda)(zd, pg, zn, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Fmla(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPMulAddHelper(zd,
|
|
pg,
|
|
za,
|
|
zn,
|
|
zm,
|
|
&Assembler::fmla,
|
|
&Assembler::fmad,
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fmls(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPMulAddHelper(zd,
|
|
pg,
|
|
za,
|
|
zn,
|
|
zm,
|
|
&Assembler::fmls,
|
|
&Assembler::fmsb,
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fnmla(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPMulAddHelper(zd,
|
|
pg,
|
|
za,
|
|
zn,
|
|
zm,
|
|
&Assembler::fnmla,
|
|
&Assembler::fnmad,
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Fnmls(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
FPMacroNaNPropagationOption nan_option) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
FPMulAddHelper(zd,
|
|
pg,
|
|
za,
|
|
zn,
|
|
zm,
|
|
&Assembler::fnmls,
|
|
&Assembler::fnmsb,
|
|
nan_option);
|
|
}
|
|
|
|
void MacroAssembler::Ftmad(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int imm3) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (zd.Aliases(zm) && !zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
|
|
Mov(scratch, zm);
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
ftmad(zd, zd, scratch, imm3);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
ftmad(zd, zd, zm, imm3);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Fcadd(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int rot) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (zd.Aliases(zm) && !zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, pg, zn);
|
|
fcadd(scratch, pg, scratch, zm, rot);
|
|
}
|
|
Mov(zd, scratch);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, pg, zn);
|
|
fcadd(zd, pg, zd, zm, rot);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Fcmla(const ZRegister& zd,
|
|
const PRegisterM& pg,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int rot) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, ztmp, za);
|
|
fcmla(ztmp, pg, zn, zm, rot);
|
|
}
|
|
Mov(zd, pg, ztmp);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, pg, za);
|
|
fcmla(zd, pg, zn, zm, rot);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Splice(const ZRegister& zd,
|
|
const PRegister& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
|
|
SingleEmissionCheckScope guard(this);
|
|
splice(zd, pg, zn, zm);
|
|
} else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, zn);
|
|
splice(scratch, pg, scratch, zm);
|
|
}
|
|
Mov(zd, scratch);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
splice(zd, pg, zd, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Clasta(const ZRegister& zd,
|
|
const PRegister& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (zd.Aliases(zm) && !zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, zn);
|
|
clasta(scratch, pg, scratch, zm);
|
|
}
|
|
Mov(zd, scratch);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
clasta(zd, pg, zd, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Clastb(const ZRegister& zd,
|
|
const PRegister& pg,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (zd.Aliases(zm) && !zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
|
|
{
|
|
MovprfxHelperScope guard(this, scratch, zn);
|
|
clastb(scratch, pg, scratch, zm);
|
|
}
|
|
Mov(zd, scratch);
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
clastb(zd, pg, zd, zm);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
int shift) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (!zd.Aliases(za) && zd.Aliases(zn)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
|
|
Mov(ztmp, zn);
|
|
{
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, ztmp, shift);
|
|
}
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, za);
|
|
(this->*fn)(zd, zn, shift);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Srsra(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
int shift) {
|
|
ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
|
|
}
|
|
|
|
void MacroAssembler::Ssra(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
int shift) {
|
|
ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
|
|
}
|
|
|
|
void MacroAssembler::Ursra(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
int shift) {
|
|
ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
|
|
}
|
|
|
|
void MacroAssembler::Usra(const ZRegister& zd,
|
|
const ZRegister& za,
|
|
const ZRegister& zn,
|
|
int shift) {
|
|
ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
|
|
}
|
|
|
|
void MacroAssembler::ComplexAddition(ZZZImmFn fn,
|
|
const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int rot) {
|
|
VIXL_ASSERT(allow_macro_instructions_);
|
|
if (!zd.Aliases(zn) && zd.Aliases(zm)) {
|
|
UseScratchRegisterScope temps(this);
|
|
ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
|
|
Mov(ztmp, zm);
|
|
{
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
(this->*fn)(zd, zd, ztmp, rot);
|
|
}
|
|
} else {
|
|
MovprfxHelperScope guard(this, zd, zn);
|
|
(this->*fn)(zd, zd, zm, rot);
|
|
}
|
|
}
|
|
|
|
void MacroAssembler::Cadd(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int rot) {
|
|
ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
|
|
}
|
|
|
|
void MacroAssembler::Sqcadd(const ZRegister& zd,
|
|
const ZRegister& zn,
|
|
const ZRegister& zm,
|
|
int rot) {
|
|
ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
|
|
}
|
|
|
|
} // namespace aarch64
|
|
} // namespace vixl
|