14#include "llvm/IR/IntrinsicsAMDGPU.h"
25 TII(*
STI.getInstrInfo()) {}
29 switch (
MI.getOpcode()) {
35 case AMDGPU::G_FMINNUM:
36 case AMDGPU::G_FMAXNUM:
37 case AMDGPU::G_FMINNUM_IEEE:
38 case AMDGPU::G_FMAXNUM_IEEE:
39 case AMDGPU::G_FMINIMUM:
40 case AMDGPU::G_FMAXIMUM:
43 case AMDGPU::G_INTRINSIC_TRUNC:
44 case AMDGPU::G_FPTRUNC:
46 case AMDGPU::G_FNEARBYINT:
47 case AMDGPU::G_INTRINSIC_ROUND:
48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
49 case AMDGPU::G_FCANONICALIZE:
50 case AMDGPU::G_AMDGPU_RCP_IFLAG:
51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
54 case AMDGPU::G_INTRINSIC: {
56 switch (IntrinsicID) {
57 case Intrinsic::amdgcn_rcp:
58 case Intrinsic::amdgcn_rcp_legacy:
59 case Intrinsic::amdgcn_sin:
60 case Intrinsic::amdgcn_fmul_legacy:
61 case Intrinsic::amdgcn_fmed3:
62 case Intrinsic::amdgcn_fma_legacy:
80 MRI.getType(
MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
86 if (!
MI.memoperands().empty())
89 switch (
MI.getOpcode()) {
91 case AMDGPU::G_SELECT:
94 case TargetOpcode::INLINEASM:
95 case TargetOpcode::INLINEASM_BR:
96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
98 case AMDGPU::G_BITCAST:
99 case AMDGPU::G_ANYEXT:
100 case AMDGPU::G_BUILD_VECTOR:
101 case AMDGPU::G_BUILD_VECTOR_TRUNC:
104 case AMDGPU::G_INTRINSIC:
105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
107 switch (IntrinsicID) {
108 case Intrinsic::amdgcn_interp_p1:
109 case Intrinsic::amdgcn_interp_p2:
110 case Intrinsic::amdgcn_interp_mov:
111 case Intrinsic::amdgcn_interp_p1_f16:
112 case Intrinsic::amdgcn_interp_p2_f16:
113 case Intrinsic::amdgcn_div_scale:
131 unsigned NumMayIncreaseSize = 0;
154 APInt(64, 0x3fc45f306dc9c882));
164 std::optional<FPValueAndVReg> FPValReg;
166 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
170 if (ST.hasInv2PiInlineImm() &&
isInv2Pi(FPValReg->Value))
178 case AMDGPU::G_FMAXNUM:
179 return AMDGPU::G_FMINNUM;
180 case AMDGPU::G_FMINNUM:
181 return AMDGPU::G_FMAXNUM;
182 case AMDGPU::G_FMAXNUM_IEEE:
183 return AMDGPU::G_FMINNUM_IEEE;
184 case AMDGPU::G_FMINNUM_IEEE:
185 return AMDGPU::G_FMAXNUM_IEEE;
186 case AMDGPU::G_FMAXIMUM:
187 return AMDGPU::G_FMINIMUM;
188 case AMDGPU::G_FMINIMUM:
189 return AMDGPU::G_FMAXIMUM;
190 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
191 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
192 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
193 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
202 MatchInfo =
MRI.getVRegDef(Src);
208 if (
MRI.hasOneNonDBGUse(Src)) {
219 case AMDGPU::G_FMINNUM:
220 case AMDGPU::G_FMAXNUM:
221 case AMDGPU::G_FMINNUM_IEEE:
222 case AMDGPU::G_FMAXNUM_IEEE:
223 case AMDGPU::G_FMINIMUM:
224 case AMDGPU::G_FMAXIMUM:
225 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
226 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
236 case AMDGPU::G_FPEXT:
237 case AMDGPU::G_INTRINSIC_TRUNC:
238 case AMDGPU::G_FPTRUNC:
239 case AMDGPU::G_FRINT:
240 case AMDGPU::G_FNEARBYINT:
241 case AMDGPU::G_INTRINSIC_ROUND:
242 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
244 case AMDGPU::G_FCANONICALIZE:
245 case AMDGPU::G_AMDGPU_RCP_IFLAG:
247 case AMDGPU::G_INTRINSIC:
248 case AMDGPU::G_INTRINSIC_CONVERGENT: {
250 switch (IntrinsicID) {
251 case Intrinsic::amdgcn_rcp:
252 case Intrinsic::amdgcn_rcp_legacy:
253 case Intrinsic::amdgcn_sin:
254 case Intrinsic::amdgcn_fmul_legacy:
255 case Intrinsic::amdgcn_fmed3:
257 case Intrinsic::amdgcn_fma_legacy:
287 Reg =
Builder.buildFNeg(
MRI.getType(Reg), Reg).getReg(0);
300 YReg =
Builder.buildFNeg(
MRI.getType(YReg), YReg).getReg(0);
305 Builder.setInstrAndDebugLoc(*MatchInfo);
318 case AMDGPU::G_FMINNUM:
319 case AMDGPU::G_FMAXNUM:
320 case AMDGPU::G_FMINNUM_IEEE:
321 case AMDGPU::G_FMAXNUM_IEEE:
322 case AMDGPU::G_FMINIMUM:
323 case AMDGPU::G_FMAXIMUM:
324 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
325 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
337 case AMDGPU::G_FPEXT:
338 case AMDGPU::G_INTRINSIC_TRUNC:
339 case AMDGPU::G_FRINT:
340 case AMDGPU::G_FNEARBYINT:
341 case AMDGPU::G_INTRINSIC_ROUND:
342 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
344 case AMDGPU::G_FCANONICALIZE:
345 case AMDGPU::G_AMDGPU_RCP_IFLAG:
346 case AMDGPU::G_FPTRUNC:
349 case AMDGPU::G_INTRINSIC:
350 case AMDGPU::G_INTRINSIC_CONVERGENT: {
352 switch (IntrinsicID) {
353 case Intrinsic::amdgcn_rcp:
354 case Intrinsic::amdgcn_rcp_legacy:
355 case Intrinsic::amdgcn_sin:
358 case Intrinsic::amdgcn_fmul_legacy:
361 case Intrinsic::amdgcn_fmed3:
366 case Intrinsic::amdgcn_fma_legacy:
382 if (
MRI.hasOneNonDBGUse(MatchInfoDst)) {
398 Builder.setInstrAndDebugLoc(*NextInst);
399 Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo,
MI.getFlags());
402 MI.eraseFromParent();
410 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
411 Register SrcReg = Def->getOperand(1).getReg();
415 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
416 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
417 bool LosesInfo =
true;
429 assert(
MI.getOpcode() == TargetOpcode::G_FPTRUNC);
448 LLT Ty =
MRI.getType(Src0);
449 auto A1 =
Builder.buildFMinNumIEEE(Ty, Src0, Src1);
450 auto B1 =
Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
451 auto C1 =
Builder.buildFMaxNumIEEE(Ty, A1, Src2);
452 Builder.buildFMinNumIEEE(
MI.getOperand(0), B1, C1);
453 MI.eraseFromParent();
459 assert(
MI.getOpcode() == TargetOpcode::G_FMUL);
464 LLT DestTy =
MRI.getType(Dst);
476 const auto SelectTrueVal =
480 const auto SelectFalseVal =
485 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
489 if (ScalarDestTy ==
LLT::float32() &&
TII.isInlineConstant(*SelectTrueVal) &&
490 TII.isInlineConstant(*SelectFalseVal))
493 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
494 if (SelectTrueLog2Val == INT_MIN)
496 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
497 if (SelectFalseLog2Val == INT_MIN)
502 auto NewSel =
Builder.buildSelect(
503 IntDestTy, SelectCondReg,
504 Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
505 Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
508 if (SelectTrueVal->isNegative()) {
510 Builder.buildFNeg(DestTy, XReg,
MRI.getVRegDef(XReg)->getFlags());
511 Builder.buildFLdexp(Dst, NegX, NewSel,
MI.getFlags());
513 Builder.buildFLdexp(Dst, XReg, NewSel,
MI.getFlags());
525 const uint64_t Val = Res->Value.getZExtValue();
526 unsigned MaskIdx = 0;
527 unsigned MaskLen = 0;
532 return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
bool matchConstantIs32BitMask(Register Reg) const
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
Class for arbitrary precision integers.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineDominatorTree * MDT
GISelChangeObserver & Observer
MachineIRBuilder & Builder
Abstract class that contains various methods for clients to notify about changes.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr LLT getScalarType() const
static constexpr LLT float32()
Get a 32-bit IEEE float value.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE