llvm.org/doxygen/AMDGPUCombinerHelper_8cpp_source.html

//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "AMDGPUCombinerHelper.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/Target/TargetMachine.h"


using namespace llvm;

using namespace MIPatternMatch;


AMDGPUCombinerHelper::AMDGPUCombinerHelper(

    GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,

    GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI,

    const GCNSubtarget &STI)

    : CombinerHelper(Observer, B, IsPreLegalize, VT, MDT, LI), STI(STI),

      TII(*STI.getInstrInfo()) {}


LLVM_READNONE


static bool fnegFoldsIntoMI(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  case AMDGPU::G_FADD:

  case AMDGPU::G_FSUB:

  case AMDGPU::G_FMUL:

  case AMDGPU::G_FMA:

  case AMDGPU::G_FMAD:

  case AMDGPU::G_FMINNUM:

  case AMDGPU::G_FMAXNUM:

  case AMDGPU::G_FMINNUM_IEEE:

  case AMDGPU::G_FMAXNUM_IEEE:

  case AMDGPU::G_FMINIMUM:

  case AMDGPU::G_FMAXIMUM:

  case AMDGPU::G_FSIN:

  case AMDGPU::G_FPEXT:

  case AMDGPU::G_INTRINSIC_TRUNC:

  case AMDGPU::G_FPTRUNC:

  case AMDGPU::G_FRINT:

  case AMDGPU::G_FNEARBYINT:

  case AMDGPU::G_INTRINSIC_ROUND:

  case AMDGPU::G_INTRINSIC_ROUNDEVEN:

  case AMDGPU::G_FCANONICALIZE:

  case AMDGPU::G_AMDGPU_RCP_IFLAG:

  case AMDGPU::G_AMDGPU_FMIN_LEGACY:

  case AMDGPU::G_AMDGPU_FMAX_LEGACY:

    return true;

  case AMDGPU::G_INTRINSIC: {

    Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();

    switch (IntrinsicID) {

    case Intrinsic::amdgcn_rcp:

    case Intrinsic::amdgcn_rcp_legacy:

    case Intrinsic::amdgcn_sin:

    case Intrinsic::amdgcn_fmul_legacy:

    case Intrinsic::amdgcn_fmed3:

    case Intrinsic::amdgcn_fma_legacy:

      return true;

    default:

      return false;

    }

  }

  default:

    return false;

  }

}


/// \p returns true if the operation will definitely need to use a 64-bit

/// encoding, and thus will use a VOP3 encoding regardless of the source

/// modifiers.

LLVM_READONLY


static bool opMustUseVOP3Encoding(const MachineInstr &MI,

                                  const MachineRegisterInfo &MRI) {

  return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||

         MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;

}


// Most FP instructions support source modifiers.

LLVM_READONLY


static bool hasSourceMods(const MachineInstr &MI) {

  if (!MI.memoperands().empty())

    return false;


  switch (MI.getOpcode()) {

  case AMDGPU::COPY:

  case AMDGPU::G_SELECT:

  case AMDGPU::G_FDIV:

  case AMDGPU::G_FREM:

  case TargetOpcode::INLINEASM:

  case TargetOpcode::INLINEASM_BR:

  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:

  case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:

  case AMDGPU::G_BITCAST:

  case AMDGPU::G_ANYEXT:

  case AMDGPU::G_BUILD_VECTOR:

  case AMDGPU::G_BUILD_VECTOR_TRUNC:

  case AMDGPU::G_PHI:

    return false;

  case AMDGPU::G_INTRINSIC:

  case AMDGPU::G_INTRINSIC_CONVERGENT: {

    Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();

    switch (IntrinsicID) {

    case Intrinsic::amdgcn_interp_p1:

    case Intrinsic::amdgcn_interp_p2:

    case Intrinsic::amdgcn_interp_mov:

    case Intrinsic::amdgcn_interp_p1_f16:

    case Intrinsic::amdgcn_interp_p2_f16:

    case Intrinsic::amdgcn_div_scale:

      return false;

    default:

      return true;

    }

  }

  default:

    return true;

  }

}


static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,

                                  unsigned CostThreshold = 4) {

  // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus

  // it is truly free to use a source modifier in all cases. If there are

  // multiple users but for each one will necessitate using VOP3, there will be

  // a code size increase. Try to avoid increasing code size unless we know it

  // will save on the instruction count.

  unsigned NumMayIncreaseSize = 0;

  Register Dst = MI.getOperand(0).getReg();

  for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {

    if (!hasSourceMods(Use))

      return false;


    if (!opMustUseVOP3Encoding(Use, MRI)) {

      if (++NumMayIncreaseSize > CostThreshold)

        return false;

    }

  }

  return true;

}


static bool mayIgnoreSignedZero(MachineInstr &MI) {

  const TargetOptions &Options = MI.getMF()->getTarget().Options;

  return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);

}


static bool isInv2Pi(const APFloat &APF) {

  static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));

  static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));

  static const APFloat KF64(APFloat::IEEEdouble(),

                            APInt(64, 0x3fc45f306dc9c882));


  return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||

         APF.bitwiseIsEqual(KF64);

}


// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an

// additional cost to negate them.


static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,

                                       MachineRegisterInfo &MRI) {

  std::optional<FPValueAndVReg> FPValReg;

  if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {

    if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())

      return true;


    const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();

    if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))

      return true;

  }

  return false;

}


static unsigned inverseMinMax(unsigned Opc) {

  switch (Opc) {

  case AMDGPU::G_FMAXNUM:

    return AMDGPU::G_FMINNUM;

  case AMDGPU::G_FMINNUM:

    return AMDGPU::G_FMAXNUM;

  case AMDGPU::G_FMAXNUM_IEEE:

    return AMDGPU::G_FMINNUM_IEEE;

  case AMDGPU::G_FMINNUM_IEEE:

    return AMDGPU::G_FMAXNUM_IEEE;

  case AMDGPU::G_FMAXIMUM:

    return AMDGPU::G_FMINIMUM;

  case AMDGPU::G_FMINIMUM:

    return AMDGPU::G_FMAXIMUM;

  case AMDGPU::G_AMDGPU_FMAX_LEGACY:

    return AMDGPU::G_AMDGPU_FMIN_LEGACY;

  case AMDGPU::G_AMDGPU_FMIN_LEGACY:

    return AMDGPU::G_AMDGPU_FMAX_LEGACY;

  default:

    llvm_unreachable("invalid min/max opcode");

  }

}


bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,

                                             MachineInstr *&MatchInfo) const {

  Register Src = MI.getOperand(1).getReg();

  MatchInfo = MRI.getVRegDef(Src);


  // If the input has multiple uses and we can either fold the negate down, or

  // the other uses cannot, give up. This both prevents unprofitable

  // transformations and infinite loops: we won't repeatedly try to fold around

  // a negate that has no 'good' form.

  if (MRI.hasOneNonDBGUse(Src)) {

    if (allUsesHaveSourceMods(MI, MRI, 0))

      return false;

  } else {

    if (fnegFoldsIntoMI(*MatchInfo) &&

        (allUsesHaveSourceMods(MI, MRI) ||

         !allUsesHaveSourceMods(*MatchInfo, MRI)))

      return false;

  }


  switch (MatchInfo->getOpcode()) {

  case AMDGPU::G_FMINNUM:

  case AMDGPU::G_FMAXNUM:

  case AMDGPU::G_FMINNUM_IEEE:

  case AMDGPU::G_FMAXNUM_IEEE:

  case AMDGPU::G_FMINIMUM:

  case AMDGPU::G_FMAXIMUM:

  case AMDGPU::G_AMDGPU_FMIN_LEGACY:

  case AMDGPU::G_AMDGPU_FMAX_LEGACY:

    // 0 doesn't have a negated inline immediate.

    return !isConstantCostlierToNegate(*MatchInfo,

                                       MatchInfo->getOperand(2).getReg(), MRI);

  case AMDGPU::G_FADD:

  case AMDGPU::G_FSUB:

  case AMDGPU::G_FMA:

  case AMDGPU::G_FMAD:

    return mayIgnoreSignedZero(*MatchInfo);

  case AMDGPU::G_FMUL:

  case AMDGPU::G_FPEXT:

  case AMDGPU::G_INTRINSIC_TRUNC:

  case AMDGPU::G_FPTRUNC:

  case AMDGPU::G_FRINT:

  case AMDGPU::G_FNEARBYINT:

  case AMDGPU::G_INTRINSIC_ROUND:

  case AMDGPU::G_INTRINSIC_ROUNDEVEN:

  case AMDGPU::G_FSIN:

  case AMDGPU::G_FCANONICALIZE:

  case AMDGPU::G_AMDGPU_RCP_IFLAG:

    return true;

  case AMDGPU::G_INTRINSIC:

  case AMDGPU::G_INTRINSIC_CONVERGENT: {

    Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();

    switch (IntrinsicID) {

    case Intrinsic::amdgcn_rcp:

    case Intrinsic::amdgcn_rcp_legacy:

    case Intrinsic::amdgcn_sin:

    case Intrinsic::amdgcn_fmul_legacy:

    case Intrinsic::amdgcn_fmed3:

      return true;

    case Intrinsic::amdgcn_fma_legacy:

      return mayIgnoreSignedZero(*MatchInfo);

    default:

      return false;

    }

  }

  default:

    return false;

  }

}


void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,

                                             MachineInstr *&MatchInfo) const {

  // Transform:

  // %A = inst %Op1, ...

  // %B = fneg %A

  //

  // into:

  //

  // (if %A has one use, specifically fneg above)

  // %B = inst (maybe fneg %Op1), ...

  //

  // (if %A has multiple uses)

  // %B = inst (maybe fneg %Op1), ...

  // %A = fneg %B


  // Replace register in operand with a register holding negated value.

  auto NegateOperand = [&](MachineOperand &Op) {

    Register Reg = Op.getReg();

    if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))

      Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);

    replaceRegOpWith(MRI, Op, Reg);

  };


  // Replace either register in operands with a register holding negated value.

  auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {

    Register XReg = X.getReg();

    Register YReg = Y.getReg();

    if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))

      replaceRegOpWith(MRI, X, XReg);

    else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))

      replaceRegOpWith(MRI, Y, YReg);

    else {

      YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);

      replaceRegOpWith(MRI, Y, YReg);

    }

  };


  Builder.setInstrAndDebugLoc(*MatchInfo);


  // Negate appropriate operands so that resulting value of MatchInfo is

  // negated.

  switch (MatchInfo->getOpcode()) {

  case AMDGPU::G_FADD:

  case AMDGPU::G_FSUB:

    NegateOperand(MatchInfo->getOperand(1));

    NegateOperand(MatchInfo->getOperand(2));

    break;

  case AMDGPU::G_FMUL:

    NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));

    break;

  case AMDGPU::G_FMINNUM:

  case AMDGPU::G_FMAXNUM:

  case AMDGPU::G_FMINNUM_IEEE:

  case AMDGPU::G_FMAXNUM_IEEE:

  case AMDGPU::G_FMINIMUM:

  case AMDGPU::G_FMAXIMUM:

  case AMDGPU::G_AMDGPU_FMIN_LEGACY:

  case AMDGPU::G_AMDGPU_FMAX_LEGACY: {

    NegateOperand(MatchInfo->getOperand(1));

    NegateOperand(MatchInfo->getOperand(2));

    unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());

    replaceOpcodeWith(*MatchInfo, Opposite);

    break;

  }

  case AMDGPU::G_FMA:

  case AMDGPU::G_FMAD:

    NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));

    NegateOperand(MatchInfo->getOperand(3));

    break;

  case AMDGPU::G_FPEXT:

  case AMDGPU::G_INTRINSIC_TRUNC:

  case AMDGPU::G_FRINT:

  case AMDGPU::G_FNEARBYINT:

  case AMDGPU::G_INTRINSIC_ROUND:

  case AMDGPU::G_INTRINSIC_ROUNDEVEN:

  case AMDGPU::G_FSIN:

  case AMDGPU::G_FCANONICALIZE:

  case AMDGPU::G_AMDGPU_RCP_IFLAG:

  case AMDGPU::G_FPTRUNC:

    NegateOperand(MatchInfo->getOperand(1));

    break;

  case AMDGPU::G_INTRINSIC:

  case AMDGPU::G_INTRINSIC_CONVERGENT: {

    Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();

    switch (IntrinsicID) {

    case Intrinsic::amdgcn_rcp:

    case Intrinsic::amdgcn_rcp_legacy:

    case Intrinsic::amdgcn_sin:

      NegateOperand(MatchInfo->getOperand(2));

      break;

    case Intrinsic::amdgcn_fmul_legacy:

      NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));

      break;

    case Intrinsic::amdgcn_fmed3:

      NegateOperand(MatchInfo->getOperand(2));

      NegateOperand(MatchInfo->getOperand(3));

      NegateOperand(MatchInfo->getOperand(4));

      break;

    case Intrinsic::amdgcn_fma_legacy:

      NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));

      NegateOperand(MatchInfo->getOperand(4));

      break;

    default:

      llvm_unreachable("folding fneg not supported for this intrinsic");

    }

    break;

  }

  default:

    llvm_unreachable("folding fneg not supported for this instruction");

  }


  Register Dst = MI.getOperand(0).getReg();

  Register MatchInfoDst = MatchInfo->getOperand(0).getReg();


  if (MRI.hasOneNonDBGUse(MatchInfoDst)) {

    // MatchInfo now has negated value so use that instead of old Dst.

    replaceRegWith(MRI, Dst, MatchInfoDst);

  } else {

    // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa

    // but replaceRegWith will replace defs as well. It is easier to replace one

    // def with a new register.

    LLT Type = MRI.getType(Dst);

    Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);

    replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);


    // MatchInfo now has negated value so use that instead of old Dst.

    replaceRegWith(MRI, Dst, NegatedMatchInfo);


    // Recreate non negated value for other uses of old MatchInfoDst

    auto NextInst = ++MatchInfo->getIterator();

    Builder.setInstrAndDebugLoc(*NextInst);

    Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());

  }


  MI.eraseFromParent();

}


// TODO: Should return converted value / extension source and avoid introducing

// intermediate fptruncs in the apply function.


static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI,

                                  Register Reg) {

  const MachineInstr *Def = MRI.getVRegDef(Reg);

  if (Def->getOpcode() == TargetOpcode::G_FPEXT) {

    Register SrcReg = Def->getOperand(1).getReg();

    return MRI.getType(SrcReg) == LLT::scalar(16);

  }


  if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {

    APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();

    bool LosesInfo = true;

    Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);

    return !LosesInfo;

  }


  return false;

}


bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI,

                                                       Register Src0,

                                                       Register Src1,

                                                       Register Src2) const {

  assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);

  Register SrcReg = MI.getOperand(1).getReg();

  if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))

    return false;


  return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) &&

         isFPExtFromF16OrConst(MRI, Src2);

}


void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,

                                                       Register Src0,

                                                       Register Src1,

                                                       Register Src2) const {

  // We expect fptrunc (fpext x) to fold out, and to constant fold any constant

  // sources.

  Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0);

  Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0);

  Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0);


  LLT Ty = MRI.getType(Src0);

  auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);

  auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);

  auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);

  Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);

  MI.eraseFromParent();

}


bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(

    MachineInstr &MI, MachineInstr &Sel,

    std::function<void(MachineIRBuilder &)> &MatchInfo) const {

  assert(MI.getOpcode() == TargetOpcode::G_FMUL);

  assert(Sel.getOpcode() == TargetOpcode::G_SELECT);

  assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());


  Register Dst = MI.getOperand(0).getReg();

  LLT DestTy = MRI.getType(Dst);

  LLT ScalarDestTy = DestTy.getScalarType();


  if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&

       ScalarDestTy != LLT::float16()) ||

      !MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))

    return false;


  Register SelectCondReg = Sel.getOperand(1).getReg();

  MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());

  MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());


  const auto SelectTrueVal =

      isConstantOrConstantSplatVectorFP(*SelectTrue, MRI);

  if (!SelectTrueVal)

    return false;

  const auto SelectFalseVal =

      isConstantOrConstantSplatVectorFP(*SelectFalse, MRI);

  if (!SelectFalseVal)

    return false;


  if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())

    return false;


  // For f32, only non-inline constants should be transformed.

  if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&

      TII.isInlineConstant(*SelectFalseVal))

    return false;


  int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();

  if (SelectTrueLog2Val == INT_MIN)

    return false;

  int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();

  if (SelectFalseLog2Val == INT_MIN)

    return false;


  MatchInfo = [=, &MI](MachineIRBuilder &Builder) {

    LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));

    auto NewSel = Builder.buildSelect(

        IntDestTy, SelectCondReg,

        Builder.buildConstant(IntDestTy, SelectTrueLog2Val),

        Builder.buildConstant(IntDestTy, SelectFalseLog2Val));


    Register XReg = MI.getOperand(1).getReg();

    if (SelectTrueVal->isNegative()) {

      auto NegX =

          Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());

      Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());

    } else {

      Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());

    }

  };


  return true;

}


bool AMDGPUCombinerHelper::matchConstantIs32BitMask(Register Reg) const {

  auto Res = getIConstantVRegValWithLookThrough(Reg, MRI);

  if (!Res)

    return false;


  const uint64_t Val = Res->Value.getZExtValue();

  unsigned MaskIdx = 0;

  unsigned MaskLen = 0;

  if (!isShiftedMask_64(Val, MaskIdx, MaskLen))

    return false;


  // Check if low 32 bits or high 32 bits are all ones.

  return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));

}


MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

hasSourceMods
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
Definition AMDGPUCombinerHelper.cpp:85

isInv2Pi
static bool isInv2Pi(const APFloat &APF)
Definition AMDGPUCombinerHelper.cpp:150

isFPExtFromF16OrConst
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
Definition AMDGPUCombinerHelper.cpp:407

mayIgnoreSignedZero
static bool mayIgnoreSignedZero(MachineInstr &MI)
Definition AMDGPUCombinerHelper.cpp:145

isConstantCostlierToNegate
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
Definition AMDGPUCombinerHelper.cpp:162

allUsesHaveSourceMods
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
Definition AMDGPUCombinerHelper.cpp:124

opMustUseVOP3Encoding
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
Definition AMDGPUCombinerHelper.cpp:77

inverseMinMax
static unsigned inverseMinMax(unsigned Opc)
Definition AMDGPUCombinerHelper.cpp:176

fnegFoldsIntoMI
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
Definition AMDGPUCombinerHelper.cpp:28

AMDGPUCombinerHelper.h
This contains common combine transformations that may be used in a combine pass.

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

LLVM_READNONE
#define LLVM_READNONE
Definition Compiler.h:315

LLVM_READONLY
#define LLVM_READONLY
Definition Compiler.h:322

CostThreshold
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Options
static LVOptions Options
Definition LVOptions.cpp:25

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

Reg
Register Reg
Definition MachineSink.cpp:2117

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

llvm::AMDGPUCombinerHelper::AMDGPUCombinerHelper
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
Definition AMDGPUCombinerHelper.cpp:20

llvm::AMDGPUCombinerHelper::matchConstantIs32BitMask
bool matchConstantIs32BitMask(Register Reg) const
Definition AMDGPUCombinerHelper.cpp:520

llvm::AMDGPUCombinerHelper::TII
const SIInstrInfo & TII
Definition AMDGPUCombinerHelper.h:26

llvm::AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Definition AMDGPUCombinerHelper.cpp:456

llvm::AMDGPUCombinerHelper::CombinerHelper
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
Definition CombinerHelper.cpp:57

llvm::AMDGPUCombinerHelper::matchExpandPromotedF16FMed3
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
Definition AMDGPUCombinerHelper.cpp:425

llvm::AMDGPUCombinerHelper::STI
const GCNSubtarget & STI
Definition AMDGPUCombinerHelper.h:25

llvm::AMDGPUCombinerHelper::applyFoldableFneg
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
Definition AMDGPUCombinerHelper.cpp:268

llvm::AMDGPUCombinerHelper::matchFoldableFneg
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
Definition AMDGPUCombinerHelper.cpp:199

llvm::AMDGPUCombinerHelper::applyExpandPromotedF16FMed3
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
Definition AMDGPUCombinerHelper.cpp:438

llvm::APFloat
Definition APFloat.h:900

llvm::APFloat::convert
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057

llvm::APFloat::bitwiseIsEqual
bool bitwiseIsEqual(const APFloat &RHS) const
Definition APFloat.h:1414

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::CombinerHelper::replaceRegWith
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
Definition CombinerHelper.cpp:181

llvm::CombinerHelper::replaceRegOpWith
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
Definition CombinerHelper.cpp:193

llvm::CombinerHelper::replaceOpcodeWith
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
Definition CombinerHelper.cpp:204

llvm::CombinerHelper::IsPreLegalize
bool IsPreLegalize
Definition CombinerHelper.h:121

llvm::CombinerHelper::MRI
MachineRegisterInfo & MRI
Definition CombinerHelper.h:117

llvm::CombinerHelper::LI
const LegalizerInfo * LI
Definition CombinerHelper.h:122

llvm::CombinerHelper::MDT
MachineDominatorTree * MDT
Definition CombinerHelper.h:120

llvm::CombinerHelper::VT
GISelValueTracking * VT
Definition CombinerHelper.h:119

llvm::CombinerHelper::Observer
GISelChangeObserver & Observer
Definition CombinerHelper.h:118

llvm::CombinerHelper::Builder
MachineIRBuilder & Builder
Definition CombinerHelper.h:116

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition GISelChangeObserver.h:30

llvm::GISelValueTracking
Definition GISelValueTracking.h:34

llvm::LLT
Definition LowLevelType.h:40

llvm::LLT::float64
static constexpr LLT float64()
Get a 64-bit IEEE double value.
Definition LowLevelType.h:95

llvm::LLT::changeElementType
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition LowLevelType.h:212

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:43

llvm::LLT::float16
static constexpr LLT float16()
Get a 16-bit IEEE half value.
Definition LowLevelType.h:85

llvm::LLT::getScalarType
constexpr LLT getScalarType() const
Definition LowLevelType.h:206

llvm::LLT::float32
static constexpr LLT float32()
Get a 32-bit IEEE float value.
Definition LowLevelType.h:90

llvm::LegalizerInfo
Definition LegalizerInfo.h:1327

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition MachineDominators.h:71

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:236

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:587

llvm::MachineInstr::FmNsz
@ FmNsz
Definition MachineInstr.h:98

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:48

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::TargetOptions
Definition TargetOptions.h:118

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:134

uint64_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:310

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GFNeg
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
Definition MIPatternMatch.h:749

llvm::MIPatternMatch::m_GFCstOrSplat
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
Definition MIPatternMatch.h:189

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::isConstantOrConstantSplatVectorFP
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1578

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433

llvm::APFloatBase::IEEEsingle
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266

llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304

llvm::APFloatBase::IEEEdouble
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267

llvm::APFloatBase::IEEEhalf
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264