24#define DEBUG_TYPE "riscvtti"
27 "riscv-v-register-bit-width-lmul",
29 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
30 "by autovectorized code. Fractional LMULs are not supported."),
36 "Overrides result used for getMaximumVF query which is used "
37 "exclusively by SLP vectorizer."),
42 cl::desc(
"Set the lower bound of a trip count to decide on "
43 "vectorization while tail-folding."),
52 size_t NumInstr = OpCodes.size();
57 return LMULCost * NumInstr;
59 for (
auto Op : OpCodes) {
61 case RISCV::VRGATHER_VI:
64 case RISCV::VRGATHER_VV:
67 case RISCV::VSLIDEUP_VI:
68 case RISCV::VSLIDEDOWN_VI:
71 case RISCV::VSLIDEUP_VX:
72 case RISCV::VSLIDEDOWN_VX:
75 case RISCV::VREDMAX_VS:
76 case RISCV::VREDMIN_VS:
77 case RISCV::VREDMAXU_VS:
78 case RISCV::VREDMINU_VS:
79 case RISCV::VREDSUM_VS:
80 case RISCV::VREDAND_VS:
81 case RISCV::VREDOR_VS:
82 case RISCV::VREDXOR_VS:
83 case RISCV::VFREDMAX_VS:
84 case RISCV::VFREDMIN_VS:
85 case RISCV::VFREDUSUM_VS: {
92 case RISCV::VFREDOSUM_VS: {
101 case RISCV::VFMV_F_S:
102 case RISCV::VFMV_S_F:
104 case RISCV::VMXOR_MM:
105 case RISCV::VMAND_MM:
106 case RISCV::VMANDN_MM:
107 case RISCV::VMNAND_MM:
109 case RISCV::VFIRST_M:
124 assert(Ty->isIntegerTy() &&
125 "getIntImmCost can only estimate cost of materialising integers");
148 if (!BO || !BO->hasOneUse())
151 if (BO->getOpcode() != Instruction::Shl)
162 if (ShAmt == Trailing)
173 assert(Ty->isIntegerTy() &&
174 "getIntImmCost can only estimate cost of materialising integers");
182 bool Takes12BitImm =
false;
183 unsigned ImmArgIdx = ~0U;
186 case Instruction::GetElementPtr:
191 case Instruction::Store: {
196 if (Idx == 1 || !Inst)
201 if (!getTLI()->allowsMemoryAccessForAlignment(
209 case Instruction::Load:
212 case Instruction::And:
214 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
217 if (Imm == UINT64_C(0xffffffff) &&
218 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
221 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
223 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
226 Takes12BitImm =
true;
228 case Instruction::Add:
229 Takes12BitImm =
true;
231 case Instruction::Or:
232 case Instruction::Xor:
234 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
236 Takes12BitImm =
true;
238 case Instruction::Mul:
240 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
243 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
246 Takes12BitImm =
true;
248 case Instruction::Sub:
249 case Instruction::Shl:
250 case Instruction::LShr:
251 case Instruction::AShr:
252 Takes12BitImm =
true;
263 if (Imm.getSignificantBits() <= 64 &&
286 return ST->hasVInstructions();
296 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
303 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
304 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
305 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
313 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
320 switch (
II->getIntrinsicID()) {
324 case Intrinsic::vector_reduce_mul:
325 case Intrinsic::vector_reduce_fmul:
331 if (ST->hasVInstructions())
337 if (ST->hasVInstructions())
338 if (
unsigned MinVLen = ST->getRealMinVLen();
353 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
356 (ST->hasVInstructions() &&
366RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
376 unsigned Size = Mask.size();
379 for (
unsigned I = 0;
I !=
Size; ++
I) {
380 if (
static_cast<unsigned>(Mask[
I]) ==
I)
386 for (
unsigned J =
I + 1; J !=
Size; ++J)
388 if (
static_cast<unsigned>(Mask[J]) != J %
I)
416 "Expected fixed vector type and non-empty mask");
419 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
423 if (NumOfDests <= 1 ||
425 Tp->getElementType()->getPrimitiveSizeInBits() ||
426 LegalNumElts >= Tp->getElementCount().getFixedValue())
429 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
432 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
436 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
437 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
438 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
440 assert(NormalizedVF >= Mask.size() &&
441 "Normalized mask expected to be not shorter than original mask.");
446 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
447 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
450 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
453 Cost +=
TTI.getShuffleCost(
456 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
458 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
459 Cost +=
TTI.getShuffleCost(
462 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
485 if (!VLen || Mask.empty())
489 LegalVT =
TTI.getTypeLegalizationCost(
495 if (NumOfDests <= 1 ||
497 Tp->getElementType()->getPrimitiveSizeInBits() ||
501 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
504 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
510 unsigned NormalizedVF =
515 assert(NormalizedVF >= Mask.size() &&
516 "Normalized mask expected to be not shorter than original mask.");
522 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
523 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
526 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
531 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
533 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
535 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
542 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
543 (NumOfDestRegs <= 2 && NumShuffles < 4))
558 if (!
LT.second.isFixedLengthVector())
566 auto GetSlideOpcode = [&](
int SlideAmt) {
568 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
570 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
571 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
574 std::array<std::pair<int, int>, 2> SrcInfo;
578 if (SrcInfo[1].second == 0)
582 if (SrcInfo[0].second != 0) {
583 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
584 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
587 if (SrcInfo[1].first == -1)
588 return FirstSlideCost;
591 if (SrcInfo[1].second != 0) {
592 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
593 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
596 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
603 return FirstSlideCost + SecondSlideCost + MaskCost;
614 "Expected the Mask to match the return size if given");
616 "Expected the same scalar types");
625 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
627 *
this, LT.second, ST->getRealVLen(),
629 if (VRegSplittingCost.
isValid())
630 return VRegSplittingCost;
635 if (Mask.size() >= 2) {
636 MVT EltTp = LT.second.getVectorElementType();
647 return 2 * LT.first * TLI->getLMULCost(LT.second);
649 if (Mask[0] == 0 || Mask[0] == 1) {
653 if (
equal(DeinterleaveMask, Mask))
654 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
659 if (LT.second.getScalarSizeInBits() != 1 &&
662 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
664 for (
unsigned I = 0;
I != NumSlides; ++
I) {
665 unsigned InsertIndex = SubVectorSize * (1 <<
I);
670 std::pair<InstructionCost, MVT> DestLT =
675 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
689 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
690 LT.second.getVectorNumElements() <= 256)) {
695 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
709 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
710 LT.second.getVectorNumElements() <= 256)) {
711 auto &
C = SrcTy->getContext();
712 auto EC = SrcTy->getElementCount();
717 return 2 * IndexCost +
718 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
737 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
765 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
766 if (std::optional<unsigned> VLen = ST->getRealVLen();
767 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
768 SubLT.second.getSizeInBits() <= *VLen)
776 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
783 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
795 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
800 Instruction::InsertElement);
801 if (LT.second.getScalarSizeInBits() == 1) {
809 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
822 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
823 RISCV::VMV_X_S, RISCV::VMV_V_X,
832 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
838 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
844 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
845 if (Index >= 0 && Index < 32)
846 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
847 else if (Index < 0 && Index > -32)
848 Opcodes[1] = RISCV::VSLIDEUP_VI;
849 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
853 if (!LT.second.isVector())
859 if (SrcTy->getElementType()->isIntegerTy(1)) {
871 MVT ContainerVT = LT.second;
872 if (LT.second.isFixedLengthVector())
873 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
875 if (ContainerVT.
bitsLE(M1VT)) {
885 if (LT.second.isFixedLengthVector())
887 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
888 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
889 if (LT.second.isFixedLengthVector() &&
890 isInt<5>(LT.second.getVectorNumElements() - 1))
891 Opcodes[1] = RISCV::VRSUB_VI;
893 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
894 return LT.first * (LenCost + GatherCost);
901 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
903 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
907 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
909 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
910 return FixedCost + LT.first * (GatherCost + SlideCost);
937 Ty, DemandedElts, Insert, Extract,
CostKind);
939 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
940 if (Ty->getScalarSizeInBits() == 1) {
950 assert(LT.second.isFixedLengthVector());
951 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
955 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
978 bool UseMaskForCond,
bool UseMaskForGaps)
const {
984 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
988 if (LT.second.isVector()) {
991 VTy->getElementCount().divideCoefficientBy(Factor));
992 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
993 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
998 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1001 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1002 Cost += Factor * TLI->getLMULCost(SubVecVT);
1003 return LT.first *
Cost;
1010 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1011 unsigned NumLoads = getEstimatedVLFor(VTy);
1012 return NumLoads * MemOpCost;
1025 unsigned VF = FVTy->getNumElements() / Factor;
1032 if (Opcode == Instruction::Load) {
1034 for (
unsigned Index : Indices) {
1038 Mask.resize(VF * Factor, -1);
1042 Cost += ShuffleCost;
1060 UseMaskForCond, UseMaskForGaps);
1062 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1069 return MemCost + ShuffleCost;
1073 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1079 if ((Opcode == Instruction::Load &&
1081 (Opcode == Instruction::Store &&
1092 {TTI::OK_AnyValue, TTI::OP_None},
I);
1093 unsigned NumLoads = getEstimatedVLFor(&VTy);
1094 return NumLoads * MemOpCost;
1098 unsigned Opcode,
Type *DataTy,
bool VariableMask,
Align Alignment,
1100 bool IsLegal = (Opcode == Instruction::Store &&
1102 (Opcode == Instruction::Load &&
1127 if (Opcode == Instruction::Store)
1128 Opcodes.
append({RISCV::VCOMPRESS_VM});
1130 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1132 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1136 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1138 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1140 (Opcode != Instruction::Load && Opcode != Instruction::Store))
1153 {TTI::OK_AnyValue, TTI::OP_None},
I);
1154 unsigned NumLoads = getEstimatedVLFor(&VTy);
1155 return NumLoads * MemOpCost;
1165 for (
auto *Ty : Tys) {
1166 if (!Ty->isVectorTy())
1180 {Intrinsic::floor, MVT::f32, 9},
1181 {Intrinsic::floor, MVT::f64, 9},
1182 {Intrinsic::ceil, MVT::f32, 9},
1183 {Intrinsic::ceil, MVT::f64, 9},
1184 {Intrinsic::trunc, MVT::f32, 7},
1185 {Intrinsic::trunc, MVT::f64, 7},
1186 {Intrinsic::round, MVT::f32, 9},
1187 {Intrinsic::round, MVT::f64, 9},
1188 {Intrinsic::roundeven, MVT::f32, 9},
1189 {Intrinsic::roundeven, MVT::f64, 9},
1190 {Intrinsic::rint, MVT::f32, 7},
1191 {Intrinsic::rint, MVT::f64, 7},
1192 {Intrinsic::nearbyint, MVT::f32, 9},
1193 {Intrinsic::nearbyint, MVT::f64, 9},
1194 {Intrinsic::bswap, MVT::i16, 3},
1195 {Intrinsic::bswap, MVT::i32, 12},
1196 {Intrinsic::bswap, MVT::i64, 31},
1197 {Intrinsic::vp_bswap, MVT::i16, 3},
1198 {Intrinsic::vp_bswap, MVT::i32, 12},
1199 {Intrinsic::vp_bswap, MVT::i64, 31},
1200 {Intrinsic::vp_fshl, MVT::i8, 7},
1201 {Intrinsic::vp_fshl, MVT::i16, 7},
1202 {Intrinsic::vp_fshl, MVT::i32, 7},
1203 {Intrinsic::vp_fshl, MVT::i64, 7},
1204 {Intrinsic::vp_fshr, MVT::i8, 7},
1205 {Intrinsic::vp_fshr, MVT::i16, 7},
1206 {Intrinsic::vp_fshr, MVT::i32, 7},
1207 {Intrinsic::vp_fshr, MVT::i64, 7},
1208 {Intrinsic::bitreverse, MVT::i8, 17},
1209 {Intrinsic::bitreverse, MVT::i16, 24},
1210 {Intrinsic::bitreverse, MVT::i32, 33},
1211 {Intrinsic::bitreverse, MVT::i64, 52},
1212 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1213 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1214 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1215 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1216 {Intrinsic::ctpop, MVT::i8, 12},
1217 {Intrinsic::ctpop, MVT::i16, 19},
1218 {Intrinsic::ctpop, MVT::i32, 20},
1219 {Intrinsic::ctpop, MVT::i64, 21},
1220 {Intrinsic::ctlz, MVT::i8, 19},
1221 {Intrinsic::ctlz, MVT::i16, 28},
1222 {Intrinsic::ctlz, MVT::i32, 31},
1223 {Intrinsic::ctlz, MVT::i64, 35},
1224 {Intrinsic::cttz, MVT::i8, 16},
1225 {Intrinsic::cttz, MVT::i16, 23},
1226 {Intrinsic::cttz, MVT::i32, 24},
1227 {Intrinsic::cttz, MVT::i64, 25},
1228 {Intrinsic::vp_ctpop, MVT::i8, 12},
1229 {Intrinsic::vp_ctpop, MVT::i16, 19},
1230 {Intrinsic::vp_ctpop, MVT::i32, 20},
1231 {Intrinsic::vp_ctpop, MVT::i64, 21},
1232 {Intrinsic::vp_ctlz, MVT::i8, 19},
1233 {Intrinsic::vp_ctlz, MVT::i16, 28},
1234 {Intrinsic::vp_ctlz, MVT::i32, 31},
1235 {Intrinsic::vp_ctlz, MVT::i64, 35},
1236 {Intrinsic::vp_cttz, MVT::i8, 16},
1237 {Intrinsic::vp_cttz, MVT::i16, 23},
1238 {Intrinsic::vp_cttz, MVT::i32, 24},
1239 {Intrinsic::vp_cttz, MVT::i64, 25},
1246 switch (ICA.
getID()) {
1247 case Intrinsic::lrint:
1248 case Intrinsic::llrint:
1249 case Intrinsic::lround:
1250 case Intrinsic::llround: {
1254 if (ST->hasVInstructions() && LT.second.isVector()) {
1256 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1257 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1258 if (LT.second.getVectorElementType() == MVT::bf16) {
1259 if (!ST->hasVInstructionsBF16Minimal())
1262 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1264 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1265 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1266 !ST->hasVInstructionsF16()) {
1267 if (!ST->hasVInstructionsF16Minimal())
1270 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1272 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1274 }
else if (SrcEltSz > DstEltSz) {
1275 Ops = {RISCV::VFNCVT_X_F_W};
1276 }
else if (SrcEltSz < DstEltSz) {
1277 Ops = {RISCV::VFWCVT_X_F_V};
1279 Ops = {RISCV::VFCVT_X_F_V};
1284 if (SrcEltSz > DstEltSz)
1285 return SrcLT.first *
1286 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1287 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1291 case Intrinsic::ceil:
1292 case Intrinsic::floor:
1293 case Intrinsic::trunc:
1294 case Intrinsic::rint:
1295 case Intrinsic::round:
1296 case Intrinsic::roundeven: {
1299 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1300 return LT.first * 8;
1303 case Intrinsic::umin:
1304 case Intrinsic::umax:
1305 case Intrinsic::smin:
1306 case Intrinsic::smax: {
1308 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1311 if (ST->hasVInstructions() && LT.second.isVector()) {
1313 switch (ICA.
getID()) {
1314 case Intrinsic::umin:
1315 Op = RISCV::VMINU_VV;
1317 case Intrinsic::umax:
1318 Op = RISCV::VMAXU_VV;
1320 case Intrinsic::smin:
1321 Op = RISCV::VMIN_VV;
1323 case Intrinsic::smax:
1324 Op = RISCV::VMAX_VV;
1327 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1331 case Intrinsic::sadd_sat:
1332 case Intrinsic::ssub_sat:
1333 case Intrinsic::uadd_sat:
1334 case Intrinsic::usub_sat: {
1336 if (ST->hasVInstructions() && LT.second.isVector()) {
1338 switch (ICA.
getID()) {
1339 case Intrinsic::sadd_sat:
1340 Op = RISCV::VSADD_VV;
1342 case Intrinsic::ssub_sat:
1343 Op = RISCV::VSSUBU_VV;
1345 case Intrinsic::uadd_sat:
1346 Op = RISCV::VSADDU_VV;
1348 case Intrinsic::usub_sat:
1349 Op = RISCV::VSSUBU_VV;
1352 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1356 case Intrinsic::fma:
1357 case Intrinsic::fmuladd: {
1360 if (ST->hasVInstructions() && LT.second.isVector())
1362 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1365 case Intrinsic::fabs: {
1367 if (ST->hasVInstructions() && LT.second.isVector()) {
1373 if (LT.second.getVectorElementType() == MVT::bf16 ||
1374 (LT.second.getVectorElementType() == MVT::f16 &&
1375 !ST->hasVInstructionsF16()))
1376 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1381 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1385 case Intrinsic::sqrt: {
1387 if (ST->hasVInstructions() && LT.second.isVector()) {
1390 MVT ConvType = LT.second;
1391 MVT FsqrtType = LT.second;
1394 if (LT.second.getVectorElementType() == MVT::bf16) {
1395 if (LT.second == MVT::nxv32bf16) {
1396 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1397 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1398 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1399 ConvType = MVT::nxv16f16;
1400 FsqrtType = MVT::nxv16f32;
1402 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1403 FsqrtOp = {RISCV::VFSQRT_V};
1404 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1406 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1407 !ST->hasVInstructionsF16()) {
1408 if (LT.second == MVT::nxv32f16) {
1409 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1410 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1411 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1412 ConvType = MVT::nxv16f16;
1413 FsqrtType = MVT::nxv16f32;
1415 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1416 FsqrtOp = {RISCV::VFSQRT_V};
1417 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1420 FsqrtOp = {RISCV::VFSQRT_V};
1423 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1424 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1428 case Intrinsic::cttz:
1429 case Intrinsic::ctlz:
1430 case Intrinsic::ctpop: {
1432 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1434 switch (ICA.
getID()) {
1435 case Intrinsic::cttz:
1438 case Intrinsic::ctlz:
1441 case Intrinsic::ctpop:
1442 Op = RISCV::VCPOP_V;
1445 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1449 case Intrinsic::abs: {
1451 if (ST->hasVInstructions() && LT.second.isVector()) {
1455 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1460 case Intrinsic::get_active_lane_mask: {
1461 if (ST->hasVInstructions()) {
1470 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1476 case Intrinsic::stepvector: {
1480 if (ST->hasVInstructions())
1481 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1483 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1484 return 1 + (LT.first - 1);
1486 case Intrinsic::experimental_cttz_elts: {
1488 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1489 if (getTLI()->shouldExpandCttzElements(ArgType))
1506 case Intrinsic::experimental_vp_splat: {
1509 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1511 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1516 case Intrinsic::experimental_vp_splice: {
1524 case Intrinsic::fptoui_sat:
1525 case Intrinsic::fptosi_sat: {
1527 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1532 if (!SrcTy->isVectorTy())
1535 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1554 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1556 LT.second.isVector()) {
1557 MVT EltTy = LT.second.getVectorElementType();
1559 ICA.
getID(), EltTy))
1560 return LT.first * Entry->Cost;
1573 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1591 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1592 Dst->getScalarSizeInBits() > ST->getELen())
1595 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1610 if (Src->getScalarSizeInBits() == 1) {
1615 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1616 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1622 if (Dst->getScalarSizeInBits() == 1) {
1628 return SrcLT.first *
1629 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1641 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1642 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1644 SrcLT.second.getSizeInBits()) ||
1646 DstLT.second.getSizeInBits()))
1650 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1652 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1653 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1657 if ((PowDiff < 1) || (PowDiff > 3))
1659 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1660 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1663 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1666 case ISD::FP_EXTEND:
1669 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1670 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1673 : (
ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1674 : RISCV::VFNCVT_F_F_W;
1676 for (; SrcEltSize != DstEltSize;) {
1680 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1682 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1690 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1692 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1694 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1695 unsigned SrcEltSize = Src->getScalarSizeInBits();
1696 unsigned DstEltSize = Dst->getScalarSizeInBits();
1698 if ((SrcEltSize == 16) &&
1699 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1705 std::pair<InstructionCost, MVT> VecF32LT =
1708 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1713 if (DstEltSize == SrcEltSize)
1714 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1715 else if (DstEltSize > SrcEltSize)
1716 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1721 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1722 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1723 if ((SrcEltSize / 2) > DstEltSize) {
1734 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1735 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1736 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1737 unsigned SrcEltSize = Src->getScalarSizeInBits();
1738 unsigned DstEltSize = Dst->getScalarSizeInBits();
1741 if ((DstEltSize == 16) &&
1742 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1748 std::pair<InstructionCost, MVT> VecF32LT =
1751 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1756 if (DstEltSize == SrcEltSize)
1757 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1758 else if (DstEltSize > SrcEltSize) {
1759 if ((DstEltSize / 2) > SrcEltSize) {
1763 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1766 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1768 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1775unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1777 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1778 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1793 if (Ty->getScalarSizeInBits() > ST->getELen())
1797 if (Ty->getElementType()->isIntegerTy(1)) {
1801 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1807 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1811 case Intrinsic::maximum:
1813 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1815 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1830 case Intrinsic::minimum:
1832 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1834 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1840 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1849 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1858 case Intrinsic::smax:
1859 SplitOp = RISCV::VMAX_VV;
1860 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1862 case Intrinsic::smin:
1863 SplitOp = RISCV::VMIN_VV;
1864 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1866 case Intrinsic::umax:
1867 SplitOp = RISCV::VMAXU_VV;
1868 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1870 case Intrinsic::umin:
1871 SplitOp = RISCV::VMINU_VV;
1872 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1874 case Intrinsic::maxnum:
1875 SplitOp = RISCV::VFMAX_VV;
1876 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1878 case Intrinsic::minnum:
1879 SplitOp = RISCV::VFMIN_VV;
1880 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1885 (LT.first > 1) ? (LT.first - 1) *
1886 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1888 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1893 std::optional<FastMathFlags> FMF,
1899 if (Ty->getScalarSizeInBits() > ST->getELen())
1902 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1910 Type *ElementTy = Ty->getElementType();
1915 if (LT.second == MVT::v1i1)
1916 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1934 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1935 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1936 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1937 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1946 return (LT.first - 1) *
1947 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1948 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1956 return (LT.first - 1) *
1957 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
1958 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1971 SplitOp = RISCV::VADD_VV;
1972 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1975 SplitOp = RISCV::VOR_VV;
1976 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1979 SplitOp = RISCV::VXOR_VV;
1980 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1983 SplitOp = RISCV::VAND_VV;
1984 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1988 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
1989 LT.second.getScalarType() == MVT::bf16)
1993 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1996 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1998 SplitOp = RISCV::VFADD_VV;
1999 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2004 (LT.first > 1) ? (LT.first - 1) *
2005 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2007 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2011 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2022 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2028 if (IsUnsigned && Opcode == Instruction::Add &&
2029 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2033 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2040 return (LT.first - 1) +
2047 assert(OpInfo.isConstant() &&
"non constant operand?");
2054 if (OpInfo.isUniform())
2060 return getConstantPoolLoadCost(Ty,
CostKind);
2069 EVT VT = TLI->getValueType(
DL, Src,
true);
2071 if (VT == MVT::Other)
2076 if (Opcode == Instruction::Store && OpInfo.isConstant())
2091 if (Src->
isVectorTy() && LT.second.isVector() &&
2093 LT.second.getSizeInBits()))
2104 BaseCost *= TLI->getLMULCost(LT.second);
2105 return Cost + BaseCost;
2114 Op1Info, Op2Info,
I);
2118 Op1Info, Op2Info,
I);
2121 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2123 Op1Info, Op2Info,
I);
2125 auto GetConstantMatCost =
2127 if (OpInfo.isUniform())
2132 return getConstantPoolLoadCost(ValTy,
CostKind);
2137 ConstantMatCost += GetConstantMatCost(Op1Info);
2139 ConstantMatCost += GetConstantMatCost(Op2Info);
2142 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2144 if (ValTy->getScalarSizeInBits() == 1) {
2148 return ConstantMatCost +
2150 getRISCVInstructionCost(
2151 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2155 return ConstantMatCost +
2156 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2160 if (ValTy->getScalarSizeInBits() == 1) {
2166 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2167 return ConstantMatCost +
2169 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2171 LT.first * getRISCVInstructionCost(
2172 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2179 return ConstantMatCost +
2180 LT.first * getRISCVInstructionCost(
2181 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2185 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2189 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2194 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2199 return ConstantMatCost +
2200 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2206 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2207 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2208 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2210 Op1Info, Op2Info,
I);
2219 return ConstantMatCost +
2220 LT.first * getRISCVInstructionCost(
2221 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2228 return ConstantMatCost +
2230 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2239 return ConstantMatCost +
2241 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2252 ValTy->isIntegerTy() && !
I->user_empty()) {
2254 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2255 U->getType()->isIntegerTy() &&
2256 !isa<ConstantData>(U->getOperand(1)) &&
2257 !isa<ConstantData>(U->getOperand(2));
2265 Op1Info, Op2Info,
I);
2272 return Opcode == Instruction::PHI ? 0 : 1;
2281 const Value *Op1)
const {
2284 if (Opcode != Instruction::ExtractElement &&
2285 Opcode != Instruction::InsertElement)
2292 if (!LT.second.isVector()) {
2301 Type *ElemTy = FixedVecTy->getElementType();
2302 auto NumElems = FixedVecTy->getNumElements();
2303 auto Align =
DL.getPrefTypeAlign(ElemTy);
2308 return Opcode == Instruction::ExtractElement
2309 ? StoreCost * NumElems + LoadCost
2310 : (StoreCost + LoadCost) * NumElems + StoreCost;
2314 if (LT.second.isScalableVector() && !LT.first.isValid())
2322 if (Opcode == Instruction::ExtractElement) {
2328 return ExtendCost + ExtractCost;
2338 return ExtendCost + InsertCost + TruncCost;
2344 unsigned BaseCost = 1;
2346 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2351 if (LT.second.isFixedLengthVector()) {
2352 unsigned Width = LT.second.getVectorNumElements();
2353 Index = Index % Width;
2358 if (
auto VLEN = ST->getRealVLen()) {
2359 unsigned EltSize = LT.second.getScalarSizeInBits();
2360 unsigned M1Max = *VLEN / EltSize;
2361 Index = Index % M1Max;
2367 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2370 else if (Opcode == Instruction::InsertElement)
2378 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2379 LT.second.isScalableVector()))) {
2381 Align VecAlign =
DL.getPrefTypeAlign(Val);
2382 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2387 if (Opcode == Instruction::ExtractElement)
2423 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2425 return BaseCost + SlideCost;
2431 unsigned Index)
const {
2440 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2442 EC.getKnownMinValue() - 1 - Index,
nullptr,
2469 if (!LT.second.isVector())
2475 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2477 if ((LT.second.getVectorElementType() == MVT::f16 ||
2478 LT.second.getVectorElementType() == MVT::bf16) &&
2479 TLI->getOperationAction(ISDOpcode, LT.second) ==
2481 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2485 CastCost += LT.first * Args.size() *
2493 LT.second = PromotedVT;
2496 auto getConstantMatCost =
2506 return getConstantPoolLoadCost(Ty,
CostKind);
2512 ConstantMatCost += getConstantMatCost(0, Op1Info);
2514 ConstantMatCost += getConstantMatCost(1, Op2Info);
2517 switch (ISDOpcode) {
2520 Op = RISCV::VADD_VV;
2525 Op = RISCV::VSLL_VV;
2530 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2535 Op = RISCV::VMUL_VV;
2539 Op = RISCV::VDIV_VV;
2543 Op = RISCV::VREM_VV;
2547 Op = RISCV::VFADD_VV;
2550 Op = RISCV::VFMUL_VV;
2553 Op = RISCV::VFDIV_VV;
2556 Op = RISCV::VFSGNJN_VV;
2561 return CastCost + ConstantMatCost +
2570 if (Ty->isFPOrFPVectorTy())
2572 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2595 if (Info.isSameBase() && V !=
Base) {
2596 if (
GEP->hasAllConstantIndices())
2602 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2603 if (Info.isUnitStride() &&
2609 GEP->getType()->getPointerAddressSpace()))
2612 {TTI::OK_AnyValue, TTI::OP_None},
2613 {TTI::OK_AnyValue, TTI::OP_None}, {});
2630 if (ST->enableDefaultUnroll())
2640 if (L->getHeader()->getParent()->hasOptSize())
2644 L->getExitingBlocks(ExitingBlocks);
2646 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2647 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2651 if (ExitingBlocks.
size() > 2)
2656 if (L->getNumBlocks() > 4)
2664 for (
auto *BB : L->getBlocks()) {
2665 for (
auto &
I : *BB) {
2669 if (IsVectorized &&
I.getType()->isVectorTy())
2705 if (Ty->isVectorTy()) {
2708 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
2714 if (
Size.isScalable() && ST->hasVInstructions())
2717 if (ST->useRVVForFixedLengthVectors())
2737 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
2745 return ST->enableUnalignedVectorMem();
2751 if (ST->hasVendorXCVmem() && !ST->is64Bit())
2773 Align Alignment)
const {
2775 if (!VTy || VTy->isScalableTy())
2783 if (VTy->getElementType()->isIntegerTy(8))
2784 if (VTy->getElementCount().getFixedValue() > 256)
2785 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
2786 ST->getMaxLMULForFixedLengthVectors();
2791 Align Alignment)
const {
2793 if (!VTy || VTy->isScalableTy())
2807 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
2808 bool Considerable =
false;
2809 AllowPromotionWithoutCommonHeader =
false;
2812 Type *ConsideredSExtType =
2814 if (
I.getType() != ConsideredSExtType)
2818 for (
const User *U :
I.users()) {
2820 Considerable =
true;
2824 if (GEPInst->getNumOperands() > 2) {
2825 AllowPromotionWithoutCommonHeader =
true;
2830 return Considerable;
2835 case Instruction::Add:
2836 case Instruction::Sub:
2837 case Instruction::Mul:
2838 case Instruction::And:
2839 case Instruction::Or:
2840 case Instruction::Xor:
2841 case Instruction::FAdd:
2842 case Instruction::FSub:
2843 case Instruction::FMul:
2844 case Instruction::FDiv:
2845 case Instruction::ICmp:
2846 case Instruction::FCmp:
2848 case Instruction::Shl:
2849 case Instruction::LShr:
2850 case Instruction::AShr:
2851 case Instruction::UDiv:
2852 case Instruction::SDiv:
2853 case Instruction::URem:
2854 case Instruction::SRem:
2855 case Instruction::Select:
2856 return Operand == 1;
2863 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
2873 switch (
II->getIntrinsicID()) {
2874 case Intrinsic::fma:
2875 case Intrinsic::vp_fma:
2876 case Intrinsic::fmuladd:
2877 case Intrinsic::vp_fmuladd:
2878 return Operand == 0 || Operand == 1;
2879 case Intrinsic::vp_shl:
2880 case Intrinsic::vp_lshr:
2881 case Intrinsic::vp_ashr:
2882 case Intrinsic::vp_udiv:
2883 case Intrinsic::vp_sdiv:
2884 case Intrinsic::vp_urem:
2885 case Intrinsic::vp_srem:
2886 case Intrinsic::ssub_sat:
2887 case Intrinsic::vp_ssub_sat:
2888 case Intrinsic::usub_sat:
2889 case Intrinsic::vp_usub_sat:
2890 case Intrinsic::vp_select:
2891 return Operand == 1;
2893 case Intrinsic::vp_add:
2894 case Intrinsic::vp_mul:
2895 case Intrinsic::vp_and:
2896 case Intrinsic::vp_or:
2897 case Intrinsic::vp_xor:
2898 case Intrinsic::vp_fadd:
2899 case Intrinsic::vp_fmul:
2900 case Intrinsic::vp_icmp:
2901 case Intrinsic::vp_fcmp:
2902 case Intrinsic::smin:
2903 case Intrinsic::vp_smin:
2904 case Intrinsic::umin:
2905 case Intrinsic::vp_umin:
2906 case Intrinsic::smax:
2907 case Intrinsic::vp_smax:
2908 case Intrinsic::umax:
2909 case Intrinsic::vp_umax:
2910 case Intrinsic::sadd_sat:
2911 case Intrinsic::vp_sadd_sat:
2912 case Intrinsic::uadd_sat:
2913 case Intrinsic::vp_uadd_sat:
2915 case Intrinsic::vp_sub:
2916 case Intrinsic::vp_fsub:
2917 case Intrinsic::vp_fdiv:
2918 return Operand == 0 || Operand == 1;
2931 if (
I->isBitwiseLogicOp()) {
2932 if (!
I->getType()->isVectorTy()) {
2933 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
2934 for (
auto &
Op :
I->operands()) {
2942 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
2943 for (
auto &
Op :
I->operands()) {
2955 Ops.push_back(&Not);
2956 Ops.push_back(&InsertElt);
2964 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
2972 if (!ST->sinkSplatOperands())
2998 for (
Use &U :
Op->uses()) {
3007 Ops.push_back(&
Op->getOperandUse(0));
3009 Use *InsertEltUse = &
Op->getOperandUse(0);
3012 Ops.push_back(&InsertElt->getOperandUse(1));
3013 Ops.push_back(InsertEltUse);
3025 if (!ST->enableUnalignedScalarMem())
3028 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3031 Options.AllowOverlappingLoads =
true;
3032 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3034 if (ST->is64Bit()) {
3035 Options.LoadSizes = {8, 4, 2, 1};
3036 Options.AllowedTailExpansions = {3, 5, 6};
3038 Options.LoadSizes = {4, 2, 1};
3039 Options.AllowedTailExpansions = {3};
3042 if (IsZeroCmp && ST->hasVInstructions()) {
3043 unsigned VLenB = ST->getRealMinVLen() / 8;
3046 unsigned MinSize = ST->getXLen() / 8 + 1;
3047 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.