llvm.org/doxygen/AArch64PostLegalizerLowering_8cpp_source.html

//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// Post-legalization lowering for instructions.

///

/// This is used to offload pattern matching from the selector.

///

/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually

/// a G_ZIP, G_UZP, etc.

///

/// General optimization combines should be handled by either the

/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.

///

//===----------------------------------------------------------------------===//


#include "AArch64ExpandImm.h"

#include "AArch64GlobalISelUtils.h"

#include "AArch64PerfectShuffle.h"

#include "AArch64Subtarget.h"

#include "GISel/AArch64LegalizerInfo.h"

#include "MCTargetDesc/AArch64MCTargetDesc.h"

#include "Utils/AArch64BaseInfo.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/Support/ErrorHandling.h"

#include <optional>


#define GET_GICOMBINER_DEPS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "aarch64-postlegalizer-lowering"


using namespace llvm;

using namespace MIPatternMatch;

using namespace AArch64GISelUtils;


namespace {


#define GET_GICOMBINER_TYPES

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_TYPES


/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.

///

/// Used for matching target-supported shuffles before codegen.

struct ShuffleVectorPseudo {

  unsigned Opc;                 ///< Opcode for the instruction. (E.g. G_ZIP1)

  Register Dst;                 ///< Destination register.

  SmallVector<SrcOp, 2> SrcOps; ///< Source registers.

  ShuffleVectorPseudo(unsigned Opc, Register Dst,

                      std::initializer_list<SrcOp> SrcOps)

      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};

  ShuffleVectorPseudo() = default;

};


/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector

/// sources of the shuffle are different.

std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,

                                                    unsigned NumElts) {

  // Look for the first non-undef element.

  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });

  if (FirstRealElt == M.end())

    return std::nullopt;


  // Use APInt to handle overflow when calculating expected element.

  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();

  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);


  // The following shuffle indices must be the successive elements after the

  // first real element.

  if (any_of(

          make_range(std::next(FirstRealElt), M.end()),

          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))

    return std::nullopt;


  // The index of an EXT is the first element if it is not UNDEF.

  // Watch out for the beginning UNDEFs. The EXT index should be the expected

  // value of the first element.  E.g.

  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.

  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.

  // ExpectedElt is the last mask index plus 1.

  uint64_t Imm = ExpectedElt.getZExtValue();

  bool ReverseExt = false;


  // There are two difference cases requiring to reverse input vectors.

  // For example, for vector <4 x i32> we have the following cases,

  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)

  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)

  // For both cases, we finally use mask <5, 6, 7, 0>, which requires

  // to reverse two input vectors.

  if (Imm < NumElts)

    ReverseExt = true;

  else

    Imm -= NumElts;

  return std::make_pair(ReverseExt, Imm);

}


/// Helper function for matchINS.

///

/// \returns a value when \p M is an ins mask for \p NumInputElements.

///

/// First element of the returned pair is true when the produced

/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.

///

/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.

std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,

                                              int NumInputElements) {

  if (M.size() != static_cast<size_t>(NumInputElements))

    return std::nullopt;

  int NumLHSMatch = 0, NumRHSMatch = 0;

  int LastLHSMismatch = -1, LastRHSMismatch = -1;

  for (int Idx = 0; Idx < NumInputElements; ++Idx) {

    if (M[Idx] == -1) {

      ++NumLHSMatch;

      ++NumRHSMatch;

      continue;

    }

    M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;

    M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;

  }

  const int NumNeededToMatch = NumInputElements - 1;

  if (NumLHSMatch == NumNeededToMatch)

    return std::make_pair(true, LastLHSMismatch);

  if (NumRHSMatch == NumNeededToMatch)

    return std::make_pair(false, LastRHSMismatch);

  return std::nullopt;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a

/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.

bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  LLT Ty = MRI.getType(Dst);

  unsigned EltSize = Ty.getScalarSizeInBits();


  // Element size for a rev cannot be 64.

  if (EltSize == 64)

    return false;


  unsigned NumElts = Ty.getNumElements();


  // Try to produce a G_REV instruction

  for (unsigned LaneSize : {64U, 32U, 16U}) {

    if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {

      unsigned Opcode;

      if (LaneSize == 64U)

        Opcode = AArch64::G_REV64;

      else if (LaneSize == 32U)

        Opcode = AArch64::G_REV32;

      else

        Opcode = AArch64::G_REV16;


      MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});

      return true;

    }

  }


  return false;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with

/// a G_TRN1 or G_TRN2 instruction.

bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isTRNMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with

/// a G_UZP1 or G_UZP2 instruction.

///

/// \param [in] MI - The shuffle vector instruction.

/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.

bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isUZPMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isZIPMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


/// Helper function for matchDup.

bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,

                                 MachineRegisterInfo &MRI,

                                 ShuffleVectorPseudo &MatchInfo) {

  if (Lane != 0)

    return false;


  // Try to match a vector splat operation into a dup instruction.

  // We're looking for this pattern:

  //

  // %scalar:gpr(s64) = COPY $x0

  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF

  // %cst0:gpr(s32) = G_CONSTANT i32 0

  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)

  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)

  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,

  // %zerovec(<2 x s32>)

  //

  // ...into:

  // %splat = G_DUP %scalar


  // Begin matching the insert.

  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,

                             MI.getOperand(1).getReg(), MRI);

  if (!InsMI)

    return false;

  // Match the undef vector operand.

  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),

                    MRI))

    return false;


  // Match the index constant 0.

  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))

    return false;


  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),

                                  {InsMI->getOperand(2).getReg()});

  return true;

}


/// Helper function for matchDup.

bool matchDupFromBuildVector(int Lane, MachineInstr &MI,

                             MachineRegisterInfo &MRI,

                             ShuffleVectorPseudo &MatchInfo) {

  assert(Lane >= 0 && "Expected positive lane?");

  int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();

  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the

  // lane's definition directly.

  auto *BuildVecMI =

      getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,

                   MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);

  // If Lane >= NumElements then it is point to RHS, just check from RHS

  if (NumElements <= Lane)

    Lane -= NumElements;


  if (!BuildVecMI)

    return false;

  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();

  MatchInfo =

      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});

  return true;

}


bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  auto MaybeLane = getSplatIndex(MI);

  if (!MaybeLane)

    return false;

  int Lane = *MaybeLane;

  // If this is undef splat, generate it via "just" vdup, if possible.

  if (Lane < 0)

    Lane = 0;

  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))

    return true;

  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))

    return true;

  return false;

}


// Check if an EXT instruction can handle the shuffle mask when the vector

// sources of the shuffle are the same.

bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {

  unsigned NumElts = Ty.getNumElements();


  // Assume that the first shuffle index is not UNDEF.  Fail if it is.

  if (M[0] < 0)

    return false;


  // If this is a VEXT shuffle, the immediate value is the index of the first

  // element.  The other shuffle indices must be the successive elements after

  // the first one.

  unsigned ExpectedElt = M[0];

  for (unsigned I = 1; I < NumElts; ++I) {

    // Increment the expected index.  If it wraps around, just follow it

    // back to index zero and keep going.

    ++ExpectedElt;

    if (ExpectedElt == NumElts)

      ExpectedElt = 0;


    if (M[I] < 0)

      continue; // Ignore UNDEF indices.

    if (ExpectedElt != static_cast<unsigned>(M[I]))

      return false;

  }


  return true;

}


bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Dst = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(Dst);

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  auto Mask = MI.getOperand(3).getShuffleMask();

  uint64_t Imm;

  auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());

  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;


  if (!ExtInfo) {

    if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||

        !isSingletonExtMask(Mask, DstTy))

      return false;


    Imm = Mask[0] * ExtFactor;

    MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});

    return true;

  }

  bool ReverseExt;

  std::tie(ReverseExt, Imm) = *ExtInfo;

  if (ReverseExt)

    std::swap(V1, V2);

  Imm *= ExtFactor;

  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});

  return true;

}


/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.

/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.

void applyShuffleVectorPseudo(MachineInstr &MI,

                              ShuffleVectorPseudo &MatchInfo) {

  MachineIRBuilder MIRBuilder(MI);

  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);

  MI.eraseFromParent();

}


/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.

/// Special-cased because the constant operand must be emitted as a G_CONSTANT

/// for the imported tablegen patterns to work.

void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {

  MachineIRBuilder MIRBuilder(MI);

  if (MatchInfo.SrcOps[2].getImm() == 0)

    MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);

  else {

    // Tablegen patterns expect an i32 G_CONSTANT as the final op.

    auto Cst =

        MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());

    MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},

                          {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});

  }

  MI.eraseFromParent();

}


void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  LLT DstTy = MRI.getType(Dst);

  assert(DstTy.getSizeInBits() == 128 &&

         "Expected 128bit vector in applyFullRev");

  MachineIRBuilder MIRBuilder(MI);

  auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);

  auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});

  MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});

  MI.eraseFromParent();

}


bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);


  auto ValAndVReg =

      getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);

  return !ValAndVReg;

}


void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &Builder) {

  auto &Insert = cast<GInsertVectorElement>(MI);

  Builder.setInstrAndDebugLoc(Insert);


  Register Offset = Insert.getIndexReg();

  LLT VecTy = MRI.getType(Insert.getReg(0));

  LLT EltTy = MRI.getType(Insert.getElementReg());

  LLT IdxTy = MRI.getType(Insert.getIndexReg());


  if (VecTy.isScalableVector())

    return;


  // Create a stack slot and store the vector into it

  MachineFunction &MF = Builder.getMF();

  Align Alignment(

      std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));

  int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),

                                                     Alignment, false);

  LLT FramePtrTy = LLT::pointer(0, 64);

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);

  auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);


  Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));


  // Get the pointer to the element, and be sure not to hit undefined behavior

  // if the index is out of bounds.

  assert(isPowerOf2_64(VecTy.getNumElements()) &&

         "Expected a power-2 vector size");

  auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);

  Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);

  auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());

  Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);

  Register EltPtr =

      Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)

          .getReg(0);


  // Write the inserted element

  Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));

  // Reload the whole vector.

  Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));

  Insert.eraseFromParent();

}


/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a

/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.

///

/// e.g.

///   %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)

///

/// Can be represented as

///

///   %extract = G_EXTRACT_VECTOR_ELT %left, 0

///   %ins = G_INSERT_VECTOR_ELT %left, %extract, 1

///

bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,

              std::tuple<Register, int, Register, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  int NumElts = MRI.getType(Dst).getNumElements();

  auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);

  if (!DstIsLeftAndDstLane)

    return false;

  bool DstIsLeft;

  int DstLane;

  std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;

  Register Left = MI.getOperand(1).getReg();

  Register Right = MI.getOperand(2).getReg();

  Register DstVec = DstIsLeft ? Left : Right;

  Register SrcVec = Left;


  int SrcLane = ShuffleMask[DstLane];

  if (SrcLane >= NumElts) {

    SrcVec = Right;

    SrcLane -= NumElts;

  }


  MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);

  return true;

}


void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,

              MachineIRBuilder &Builder,

              std::tuple<Register, int, Register, int> &MatchInfo) {

  Builder.setInstrAndDebugLoc(MI);

  Register Dst = MI.getOperand(0).getReg();

  auto ScalarTy = MRI.getType(Dst).getElementType();

  Register DstVec, SrcVec;

  int DstLane, SrcLane;

  std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;

  auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);

  auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);

  auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);

  Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);

  MI.eraseFromParent();

}


/// isVShiftRImm - Check if this is a valid vector for the immediate

/// operand of a vector shift right operation. The value must be in the range:

///   1 <= Value <= ElementBits for a right shift.

bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,

                  int64_t &Cnt) {

  assert(Ty.isVector() && "vector shift count is not a vector type");

  MachineInstr *MI = MRI.getVRegDef(Reg);

  auto Cst = getAArch64VectorSplatScalar(*MI, MRI);

  if (!Cst)

    return false;

  Cnt = *Cst;

  int64_t ElementBits = Ty.getScalarSizeInBits();

  return Cnt >= 1 && Cnt <= ElementBits;

}


/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.

bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,

                       int64_t &Imm) {

  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||

         MI.getOpcode() == TargetOpcode::G_LSHR);

  LLT Ty = MRI.getType(MI.getOperand(1).getReg());

  if (!Ty.isVector())

    return false;

  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);

}


void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,

                       int64_t &Imm) {

  unsigned Opc = MI.getOpcode();

  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);

  unsigned NewOpc =

      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;

  MachineIRBuilder MIB(MI);

  auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);

  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});

  MI.eraseFromParent();

}


/// Determine if it is possible to modify the \p RHS and predicate \p P of a

/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.

///

/// \returns A pair containing the updated immediate and predicate which may

/// be used to optimize the instruction.

///

/// \note This assumes that the comparison has been legalized.

std::optional<std::pair<uint64_t, CmpInst::Predicate>>

tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,

                        const MachineRegisterInfo &MRI) {

  const auto &Ty = MRI.getType(RHS);

  if (Ty.isVector())

    return std::nullopt;

  unsigned Size = Ty.getSizeInBits();

  assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");


  // If the RHS is not a constant, or the RHS is already a valid arithmetic

  // immediate, then there is nothing to change.

  auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (!ValAndVReg)

    return std::nullopt;

  uint64_t OriginalC = ValAndVReg->Value.getZExtValue();

  uint64_t C = OriginalC;

  if (isLegalArithImmed(C))

    return std::nullopt;


  // We have a non-arithmetic immediate. Check if adjusting the immediate and

  // adjusting the predicate will result in a legal arithmetic immediate.

  switch (P) {

  default:

    return std::nullopt;

  case CmpInst::ICMP_SLT:

  case CmpInst::ICMP_SGE:

    // Check for

    //

    // x slt c => x sle c - 1

    // x sge c => x sgt c - 1

    //

    // When c is not the smallest possible negative number.

    if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||

        (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))

      return std::nullopt;

    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;

    C -= 1;

    break;

  case CmpInst::ICMP_ULT:

  case CmpInst::ICMP_UGE:

    // Check for

    //

    // x ult c => x ule c - 1

    // x uge c => x ugt c - 1

    //

    // When c is not zero.

    assert(C != 0 && "C should not be zero here!");

    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;

    C -= 1;

    break;

  case CmpInst::ICMP_SLE:

  case CmpInst::ICMP_SGT:

    // Check for

    //

    // x sle c => x slt c + 1

    // x sgt c => s sge c + 1

    //

    // When c is not the largest possible signed integer.

    if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||

        (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))

      return std::nullopt;

    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;

    C += 1;

    break;

  case CmpInst::ICMP_ULE:

  case CmpInst::ICMP_UGT:

    // Check for

    //

    // x ule c => x ult c + 1

    // x ugt c => s uge c + 1

    //

    // When c is not the largest possible unsigned integer.

    if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||

        (Size == 64 && C == UINT64_MAX))

      return std::nullopt;

    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;

    C += 1;

    break;

  }


  // Check if the new constant is valid, and return the updated constant and

  // predicate if it is.

  if (Size == 32)

    C = static_cast<uint32_t>(C);

  if (isLegalArithImmed(C))

    return {{C, P}};


  auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {

    SmallVector<AArch64_IMM::ImmInsnModel> Insn;

    AArch64_IMM::expandMOVImm(Imm, 32, Insn);

    return Insn.size();

  };


  if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))

    return {{C, P}};


  return std::nullopt;

}


/// Determine whether or not it is possible to update the RHS and predicate of

/// a G_ICMP instruction such that the RHS will be selected as an arithmetic

/// immediate.

///

/// \p MI - The G_ICMP instruction

/// \p MatchInfo - The new RHS immediate and predicate on success

///

/// See tryAdjustICmpImmAndPred for valid transformations.

bool matchAdjustICmpImmAndPred(

    MachineInstr &MI, const MachineRegisterInfo &MRI,

    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_ICMP);

  Register RHS = MI.getOperand(3).getReg();

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {

    MatchInfo = *MaybeNewImmAndPred;

    return true;

  }

  return false;

}


void applyAdjustICmpImmAndPred(

    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,

    MachineIRBuilder &MIB, GISelChangeObserver &Observer) {

  MIB.setInstrAndDebugLoc(MI);

  MachineOperand &RHS = MI.getOperand(3);

  MachineRegisterInfo &MRI = *MIB.getMRI();

  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),

                               MatchInfo.first);

  Observer.changingInstr(MI);

  RHS.setReg(Cst->getOperand(0).getReg());

  MI.getOperand(1).setPredicate(MatchInfo.second);

  Observer.changedInstr(MI);

}


bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,

                  std::pair<unsigned, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Src1Reg = MI.getOperand(1).getReg();

  const LLT SrcTy = MRI.getType(Src1Reg);

  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  auto LaneIdx = getSplatIndex(MI);

  if (!LaneIdx)

    return false;


  // The lane idx should be within the first source vector.

  if (*LaneIdx >= SrcTy.getNumElements())

    return false;


  if (DstTy != SrcTy)

    return false;


  LLT ScalarTy = SrcTy.getElementType();

  unsigned ScalarSize = ScalarTy.getSizeInBits();


  unsigned Opc = 0;

  switch (SrcTy.getNumElements()) {

  case 2:

    if (ScalarSize == 64)

      Opc = AArch64::G_DUPLANE64;

    else if (ScalarSize == 32)

      Opc = AArch64::G_DUPLANE32;

    break;

  case 4:

    if (ScalarSize == 32)

      Opc = AArch64::G_DUPLANE32;

    else if (ScalarSize == 16)

      Opc = AArch64::G_DUPLANE16;

    break;

  case 8:

    if (ScalarSize == 8)

      Opc = AArch64::G_DUPLANE8;

    else if (ScalarSize == 16)

      Opc = AArch64::G_DUPLANE16;

    break;

  case 16:

    if (ScalarSize == 8)

      Opc = AArch64::G_DUPLANE8;

    break;

  default:

    break;

  }

  if (!Opc)

    return false;


  MatchInfo.first = Opc;

  MatchInfo.second = *LaneIdx;

  return true;

}


void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,

                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Src1Reg = MI.getOperand(1).getReg();

  const LLT SrcTy = MRI.getType(Src1Reg);


  B.setInstrAndDebugLoc(MI);

  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);


  Register DupSrc = MI.getOperand(1).getReg();

  // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.

  // To do this, we can use a G_CONCAT_VECTORS to do the widening.

  if (SrcTy.getSizeInBits() == 64) {

    auto Undef = B.buildUndef(SrcTy);

    DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),

                                  {Src1Reg, Undef.getReg(0)})

                 .getReg(0);

  }

  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});

  MI.eraseFromParent();

}


bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &Unmerge = cast<GUnmerge>(MI);

  Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

  const LLT SrcTy = MRI.getType(Src1Reg);

  if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)

    return false;

  return SrcTy.isVector() && !SrcTy.isScalable() &&

         Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;

}


void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                                 MachineIRBuilder &B) {

  auto &Unmerge = cast<GUnmerge>(MI);

  Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

  const LLT SrcTy = MRI.getType(Src1Reg);

  assert((SrcTy.isVector() && !SrcTy.isScalable()) &&

         "Expected a fixed length vector");


  for (int I = 0; I < SrcTy.getNumElements(); ++I)

    B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);

  MI.eraseFromParent();

}


bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);


  // Later, during selection, we'll try to match imported patterns using

  // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower

  // G_BUILD_VECTORs which could match those patterns.

  if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))

    return false;


  return getAArch64VectorSplat(MI, MRI).has_value();

}


void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,

                           MachineIRBuilder &B) {

  B.setInstrAndDebugLoc(MI);

  B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},

               {MI.getOperand(1).getReg()});

  MI.eraseFromParent();

}


/// \returns how many instructions would be saved by folding a G_ICMP's shift

/// and/or extension operations.

unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {

  // No instructions to save if there's more than one use or no uses.

  if (!MRI.hasOneNonDBGUse(CmpOp))

    return 0;


  // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)

  auto IsSupportedExtend = [&](const MachineInstr &MI) {

    if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)

      return true;

    if (MI.getOpcode() != TargetOpcode::G_AND)

      return false;

    auto ValAndVReg =

        getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);

    if (!ValAndVReg)

      return false;

    uint64_t Mask = ValAndVReg->Value.getZExtValue();

    return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);

  };


  MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);

  if (IsSupportedExtend(*Def))

    return 1;


  unsigned Opc = Def->getOpcode();

  if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&

      Opc != TargetOpcode::G_LSHR)

    return 0;


  auto MaybeShiftAmt =

      getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);

  if (!MaybeShiftAmt)

    return 0;

  uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();

  MachineInstr *ShiftLHS =

      getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);


  // Check if we can fold an extend and a shift.

  // FIXME: This is duplicated with the selector. (See:

  // selectArithExtendedRegister)

  if (IsSupportedExtend(*ShiftLHS))

    return (ShiftAmt <= 4) ? 2 : 1;


  LLT Ty = MRI.getType(Def->getOperand(0).getReg());

  if (Ty.isVector())

    return 0;

  unsigned ShiftSize = Ty.getSizeInBits();

  if ((ShiftSize == 32 && ShiftAmt <= 31) ||

      (ShiftSize == 64 && ShiftAmt <= 63))

    return 1;

  return 0;

}


/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP

/// instruction \p MI.

bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_ICMP);

  // Swap the operands if it would introduce a profitable folding opportunity.

  // (e.g. a shift + extend).

  //

  //  For example:

  //    lsl     w13, w11, #1

  //    cmp     w13, w12

  // can be turned into:

  //    cmp     w12, w11, lsl #1


  // Don't swap if there's a constant on the RHS, because we know we can fold

  // that.

  Register RHS = MI.getOperand(3).getReg();

  auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))

    return false;


  Register LHS = MI.getOperand(2).getReg();

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  auto GetRegForProfit = [&](Register Reg) {

    MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);

    return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;

  };


  // Don't have a constant on the RHS. If we swap the LHS and RHS of the

  // compare, would we be able to fold more instructions?

  Register TheLHS = GetRegForProfit(LHS);

  Register TheRHS = GetRegForProfit(RHS);


  // If the LHS is more likely to give us a folding opportunity, then swap the

  // LHS and RHS.

  return (getCmpOperandFoldingProfit(TheLHS, MRI) >

          getCmpOperandFoldingProfit(TheRHS, MRI));

}


void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  Register LHS = MI.getOperand(2).getReg();

  Register RHS = MI.getOperand(3).getReg();

  Observer.changedInstr(MI);

  MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));

  MI.getOperand(2).setReg(RHS);

  MI.getOperand(3).setReg(LHS);

  Observer.changedInstr(MI);

}


/// \returns a function which builds a vector floating point compare instruction

/// for a condition code \p CC.

/// \param [in] NoNans - True if the target has NoNansFPMath.

std::function<Register(MachineIRBuilder &)>

getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,

              MachineRegisterInfo &MRI) {

  LLT DstTy = MRI.getType(LHS);

  assert(DstTy.isVector() && "Expected vector types only?");

  assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");

  switch (CC) {

  default:

    llvm_unreachable("Unexpected condition code!");

  case AArch64CC::NE:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});

      return MIB.buildNot(DstTy, FCmp).getReg(0);

    };

  case AArch64CC::EQ:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);

    };

  case AArch64CC::GE:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);

    };

  case AArch64CC::GT:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);

    };

  case AArch64CC::LS:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);

    };

  case AArch64CC::MI:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);

    };

  }

}


/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.

bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &MIB) {

  assert(MI.getOpcode() == TargetOpcode::G_FCMP);

  const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();


  Register Dst = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(Dst);

  if (!DstTy.isVector() || !ST.hasNEON())

    return false;

  Register LHS = MI.getOperand(2).getReg();

  unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();

  if (EltSize == 16 && !ST.hasFullFP16())

    return false;

  if (EltSize != 16 && EltSize != 32 && EltSize != 64)

    return false;


  return true;

}


/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.

void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &MIB) {

  assert(MI.getOpcode() == TargetOpcode::G_FCMP);

  const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();


  const auto &CmpMI = cast<GFCmp>(MI);


  Register Dst = CmpMI.getReg(0);

  CmpInst::Predicate Pred = CmpMI.getCond();

  Register LHS = CmpMI.getLHSReg();

  Register RHS = CmpMI.getRHSReg();


  LLT DstTy = MRI.getType(Dst);


  bool Invert = false;

  AArch64CC::CondCode CC, CC2 = AArch64CC::AL;

  if ((Pred == CmpInst::Predicate::FCMP_ORD ||

       Pred == CmpInst::Predicate::FCMP_UNO) &&

      isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {

    // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't

    // NaN, so equivalent to a == a and doesn't need the two comparisons an

    // "ord" normally would.

    // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is

    // thus equivalent to a != a.

    RHS = LHS;

    CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;

  } else

    changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);


  // Instead of having an apply function, just build here to simplify things.

  MIB.setInstrAndDebugLoc(MI);


  const bool NoNans =

      ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;


  auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);

  Register CmpRes;

  if (CC2 == AArch64CC::AL)

    CmpRes = Cmp(MIB);

  else {

    auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);

    auto Cmp2Dst = Cmp2(MIB);

    auto Cmp1Dst = Cmp(MIB);

    CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

  }

  if (Invert)

    CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);

  MRI.replaceRegWith(Dst, CmpRes);

  MI.eraseFromParent();

}


// Matches G_BUILD_VECTOR where at least one source operand is not a constant

bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto *GBuildVec = cast<GBuildVector>(&MI);


  // Check if the values are all constants

  for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

    auto ConstVal =

        getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);


    if (!ConstVal.has_value())

      return true;

  }


  return false;

}


void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,

                                   MachineIRBuilder &B) {

  auto *GBuildVec = cast<GBuildVector>(&MI);

  LLT DstTy = MRI.getType(GBuildVec->getReg(0));

  Register DstReg = B.buildUndef(DstTy).getReg(0);


  for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

    Register SrcReg = GBuildVec->getSourceReg(I);

    if (mi_match(SrcReg, MRI, m_GImplicitDef()))

      continue;

    auto IdxReg = B.buildConstant(LLT::scalar(64), I);

    DstReg =

        B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);

  }

  B.buildCopy(GBuildVec->getReg(0), DstReg);

  GBuildVec->eraseFromParent();

}


bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,

                         Register &SrcReg) {

  assert(MI.getOpcode() == TargetOpcode::G_STORE);

  Register DstReg = MI.getOperand(0).getReg();

  if (MRI.getType(DstReg).isVector())

    return false;

  // Match a store of a truncate.

  if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))

    return false;

  // Only form truncstores for value types of max 64b.

  return MRI.getType(SrcReg).getSizeInBits() <= 64;

}


void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &B, GISelChangeObserver &Observer,

                         Register &SrcReg) {

  assert(MI.getOpcode() == TargetOpcode::G_STORE);

  Observer.changingInstr(MI);

  MI.getOperand(0).setReg(SrcReg);

  Observer.changedInstr(MI);

}


// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to

// form in the first place for combine opportunities, so any remaining ones

// at this stage need be lowered back.

bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

  Register DstReg = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(DstReg);

  return DstTy.isVector();

}


void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &B, GISelChangeObserver &Observer) {

  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

  B.setInstrAndDebugLoc(MI);

  LegalizerHelper Helper(*MI.getMF(), Observer, B);

  Helper.lower(MI, 0, /* Unused hint type */ LLT());

}


/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)

///           => unused, <N x t> = unmerge v

bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                              Register &MatchInfo) {

  auto &Unmerge = cast<GUnmerge>(MI);

  if (Unmerge.getNumDefs() != 2)

    return false;

  if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))

    return false;


  LLT DstTy = MRI.getType(Unmerge.getReg(0));

  if (!DstTy.isVector())

    return false;


  MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);

  if (!Ext)

    return false;


  Register ExtSrc1 = Ext->getOperand(1).getReg();

  Register ExtSrc2 = Ext->getOperand(2).getReg();

  auto LowestVal =

      getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);

  if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())

    return false;


  if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))

    return false;


  MatchInfo = ExtSrc1;

  return true;

}


void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                              MachineIRBuilder &B,

                              GISelChangeObserver &Observer, Register &SrcReg) {

  Observer.changingInstr(MI);

  // Swap dst registers.

  Register Dst1 = MI.getOperand(0).getReg();

  MI.getOperand(0).setReg(MI.getOperand(1).getReg());

  MI.getOperand(1).setReg(Dst1);

  MI.getOperand(2).setReg(SrcReg);

  Observer.changedInstr(MI);

}


// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR

// Match v2s64 mul instructions, which will then be scalarised later on

// Doing these two matches in one function to ensure that the order of matching

// will always be the same.

// Try lowering MUL to MULL before trying to scalarize if needed.

bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {

  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  return DstTy == LLT::fixed_vector(2, 64);

}


void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,

                   MachineIRBuilder &B, GISelChangeObserver &Observer) {

  assert(MI.getOpcode() == TargetOpcode::G_MUL &&

         "Expected a G_MUL instruction");


  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");

  LegalizerHelper Helper(*MI.getMF(), Observer, B);

  Helper.fewerElementsVector(

      MI, 0,

      DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)));

}


class AArch64PostLegalizerLoweringImpl : public Combiner {

protected:

  const CombinerHelper Helper;

  const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;

  const AArch64Subtarget &STI;


public:

  AArch64PostLegalizerLoweringImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelCSEInfo *CSEInfo,

      const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

      const AArch64Subtarget &STI);


  static const char *getName() { return "AArch6400PreLegalizerCombiner"; }


  bool tryCombineAll(MachineInstr &I) const override;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

};


#define GET_GICOMBINER_IMPL

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_IMPL


AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelCSEInfo *CSEInfo,

    const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

    const AArch64Subtarget &STI)

    : Combiner(MF, CInfo, TPC, /*VT*/ nullptr, CSEInfo),

      Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),

      STI(STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


class AArch64PostLegalizerLowering : public MachineFunctionPass {

public:

  static char ID;


  AArch64PostLegalizerLowering();


  StringRef getPassName() const override {

    return "AArch64PostLegalizerLowering";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  MachineFunctionPass::getAnalysisUsage(AU);

}


AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()

    : MachineFunctionPass(ID) {

  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasFailedISel())

    return false;

  assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();


  const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();

  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,

                     /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,

                     F.hasOptSize(), F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.

  CInfo.EnableFullDCE = false;

  AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,

                                        RuleConfig, ST);

  return Impl.combineMachineInstrs();

}


char AArch64PostLegalizerLowering::ID = 0;

INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,

                      "Lower AArch64 MachineInstrs after legalization", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,

                    "Lower AArch64 MachineInstrs after legalization", false,

                    false)


namespace llvm {


FunctionPass *createAArch64PostLegalizerLowering() {

  return new AArch64PostLegalizerLowering();

}


} // end namespace llvm

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

AArch64BaseInfo.h

AArch64ExpandImm.h

AArch64GlobalISelUtils.h

isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition AArch64ISelLowering.cpp:16193

isINSMask
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
Definition AArch64ISelLowering.cpp:13841

getCmpOperandFoldingProfit
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
Definition AArch64ISelLowering.cpp:4003

AArch64LegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AArch64.

AArch64MCTargetDesc.h

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

AArch64PerfectShuffle.h

AArch64Subtarget.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Utils.h

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

GIMatchTableExecutorImpl.h

GISelChangeObserver.h
This contains common code to allow clients to notify changes to machine instr.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

InstrTypes.h

LegalizerHelper.h

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineFrameInfo.h

MachineFunctionPass.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineInstrBuilder.h

MachineRegisterInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2117

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

P
#define P(N)

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

TargetOpcodes.h

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

RHS
Value * RHS
Definition X86PartialReduction.cpp:74

LHS
Value * LHS
Definition X86PartialReduction.cpp:73

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:68

llvm::AArch64Subtarget
Definition AArch64Subtarget.h:38

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540

llvm::APInt::logBase2
unsigned logBase2() const
Definition APInt.h:1761

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::CmpInst
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678

llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707

llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708

llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703

llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687

llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706

llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829

llvm::Combiner
Combiner implementation.
Definition Combiner.h:34

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::GISelCSEInfo
The CSE Analysis object.
Definition CSEInfo.h:71

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition GISelChangeObserver.h:30

llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.

llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.

llvm::LLT
Definition LowLevelType.h:40

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition LowLevelType.h:182

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:43

llvm::LLT::getNumElements
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition LowLevelType.h:160

llvm::LLT::isVector
constexpr bool isVector() const
Definition LowLevelType.h:149

llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition LowLevelType.h:58

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition LowLevelType.h:191

llvm::LLT::getElementType
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition LowLevelType.h:278

llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition LowLevelType.h:184

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition LowLevelType.h:101

llvm::LLT::changeElementCount
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition LowLevelType.h:228

llvm::LLT::getSizeInBytes
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition LowLevelType.h:201

llvm::LegalizerHelper
Definition LegalizerHelper.h:49

llvm::MachineFrameInfo::CreateStackObject
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition MachineFrameInfo.cpp:51

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:184

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:778

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:853

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:236

llvm::MachineIRBuilder::buildNot
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
Definition MachineIRBuilder.h:2023

llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition MachineIRBuilder.h:418

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition MachineIRBuilder.h:377

llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition MachineIRBuilder.h:310

llvm::MachineIRBuilder::buildOr
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
Definition MachineIRBuilder.h:2008

llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition MachineIRBuilder.cpp:333

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition MachineInstrBuilder.h:123

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:48

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1197

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition TargetPassConfig.h:84

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

uint32_t

uint64_t

unsigned

UINT64_MAX
#define UINT64_MAX
Definition DataTypes.h:77

INT64_MIN
#define INT64_MIN
Definition DataTypes.h:74

INT64_MAX
#define INT64_MAX
Definition DataTypes.h:71

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:239

llvm::AArch64CC::CondCode
CondCode
Definition AArch64BaseInfo.h:254

llvm::AArch64CC::NE
@ NE
Definition AArch64BaseInfo.h:256

llvm::AArch64CC::GE
@ GE
Definition AArch64BaseInfo.h:265

llvm::AArch64CC::EQ
@ EQ
Definition AArch64BaseInfo.h:255

llvm::AArch64CC::MI
@ MI
Definition AArch64BaseInfo.h:259

llvm::AArch64CC::GT
@ GT
Definition AArch64BaseInfo.h:267

llvm::AArch64CC::AL
@ AL
Definition AArch64BaseInfo.h:269

llvm::AArch64CC::LS
@ LS
Definition AArch64BaseInfo.h:264

llvm::AArch64GISelUtils
Definition AArch64GlobalISelUtils.h:24

llvm::AArch64GISelUtils::getAArch64VectorSplat
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition AArch64GlobalISelUtils.cpp:19

llvm::AArch64GISelUtils::isLegalArithImmed
constexpr bool isLegalArithImmed(const uint64_t C)
Definition AArch64GlobalISelUtils.h:28

llvm::AArch64GISelUtils::changeVectorFCMPPredToAArch64CC
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
Definition AArch64GlobalISelUtils.cpp:188

llvm::AArch64GISelUtils::isCMN
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
Definition AArch64GlobalISelUtils.cpp:41

llvm::AArch64GISelUtils::getAArch64VectorSplatScalar
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition AArch64GlobalISelUtils.cpp:33

llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition AArch64ExpandImm.cpp:533

llvm::AArch64
Definition AArch64TargetParser.h:34

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:129

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:310

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:277

llvm::MIPatternMatch::m_GImplicitDef
ImplicitDefMatch m_GImplicitDef()
Definition MIPatternMatch.h:471

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GTrunc
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Definition MIPatternMatch.h:715

llvm::MipsISD::Ext
@ Ext
Definition MipsISelLowering.h:157

llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition PPCPredicates.h:87

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:55

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
Definition X86BaseInfo.h:109

llvm::cfg::UpdateKind::Insert
@ Insert
Definition CFGUpdate.h:26

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::isBuildVectorAllZeros
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1482

llvm::getOpcodeDef
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651

llvm::AlignStyle::Right
@ Right
Definition FormatCommon.h:17

llvm::AlignStyle::Left
@ Left
Definition FormatCommon.h:17

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::isTRNMask
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
Definition AArch64PerfectShuffle.h:6687

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293

llvm::getDefIgnoringCopies
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492

llvm::createAArch64PostLegalizerLowering
FunctionPass * createAArch64PostLegalizerLowering()
Definition AArch64PostLegalizerLowering.cpp:1287

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::isUZPMask
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
Definition AArch64PerfectShuffle.h:6660

llvm::isREVMask
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
Definition AArch64PerfectShuffle.h:6703

llvm::getAnyConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439

llvm::isBuildVectorAllOnes
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1488

llvm::getSelectionDAGFallbackAnalysisUsage
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1185

llvm::isZIPMask
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
Definition AArch64PerfectShuffle.h:6627

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740

llvm::getSplatIndex
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition VectorUtils.cpp:369

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853

true
Definition SPIRVConvergenceRegionAnalysis.cpp:40

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::CombinerInfo
Definition CombinerInfo.h:24

llvm::CombinerInfo::ObserverLevel::SinglePass
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Definition CombinerInfo.h:71

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:313

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1064