79#define DEBUG_TYPE "block-placement"
81STATISTIC(NumCondBranches,
"Number of conditional branches");
82STATISTIC(NumUncondBranches,
"Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
90 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
95 "align-all-nofallthru-blocks",
96 cl::desc(
"Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
102 "max-bytes-for-alignment",
103 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-predecessor-limit",
109 cl::desc(
"For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
115 "block-placement-exit-block-bias",
116 cl::desc(
"Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
124 "loop-to-cold-block-ratio",
125 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
131 cl::desc(
"Force outlining cold blocks from loops."),
136 cl::desc(
"Model the cost of loop rotation more "
137 "precisely by using profile data."),
142 cl::desc(
"Force the use of precise cost "
143 "loop rotation strategy."),
148 cl::desc(
"Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
154 cl::desc(
"Cost of jump instructions."),
158 cl::desc(
"Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
165 cl::desc(
"Perform branch folding during placement. "
166 "Reduces code size."),
171 "tail-dup-placement-threshold",
172 cl::desc(
"Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
197 "tail-dup-profile-percent-threshold",
198 cl::desc(
"If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
205 "triangle-chain-count",
206 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
231 cl::desc(
"Use ext-tsp for size-aware block placement."));
280 BlockToChainMapType &BlockToChain;
289 : Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB &&
"Cannot create a chain with a null basic block");
291 BlockToChain[BB] =
this;
307 for (
iterator i = begin(); i != end(); ++i) {
323 assert(BB &&
"Can't merge a null block.");
324 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] =
this;
335 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
342 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
343 BlockToChain[ChainBB] =
this;
364 unsigned UnscheduledPredecessors = 0;
367class MachineBlockPlacement {
369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
372 struct BlockAndTailDupResult {
373 MachineBasicBlock *BB =
nullptr;
378 struct WeightedEdge {
379 BlockFrequency Weight;
380 MachineBasicBlock *Src =
nullptr;
381 MachineBasicBlock *Dest =
nullptr;
389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
392 MachineFunction *F =
nullptr;
395 const MachineBranchProbabilityInfo *MBPI =
nullptr;
398 std::unique_ptr<MBFIWrapper> MBFI;
401 MachineLoopInfo *MLI =
nullptr;
406 MachineBasicBlock *PreferredLoopExit =
nullptr;
409 const TargetInstrInfo *TII =
nullptr;
412 const TargetLoweringBase *TLI =
nullptr;
415 MachinePostDominatorTree *MPDT =
nullptr;
417 ProfileSummaryInfo *PSI =
nullptr;
430 TailDuplicator TailDup;
433 BlockFrequency DupThreshold;
435 unsigned TailDupSize;
439 bool UseProfileCount =
false;
448 SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
468 BlockFrequency getBlockCountOrFrequency(
const MachineBasicBlock *BB) {
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
472 return BlockFrequency(*
Count);
474 return BlockFrequency(0);
476 return MBFI->getBlockFreq(BB);
480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);
481 void initTailDupThreshold();
485 void markChainSuccessors(
const BlockChain &Chain,
486 const MachineBasicBlock *LoopHeaderBB,
487 const BlockFilterSet *BlockFilter =
nullptr);
491 void markBlockSuccessors(
const BlockChain &Chain,
const MachineBasicBlock *BB,
492 const MachineBasicBlock *LoopHeaderBB,
493 const BlockFilterSet *BlockFilter =
nullptr);
496 collectViableSuccessors(
const MachineBasicBlock *BB,
const BlockChain &Chain,
497 const BlockFilterSet *BlockFilter,
498 SmallVector<MachineBasicBlock *, 4> &Successors);
499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
500 BlockFilterSet *BlockFilter);
501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
502 MachineBasicBlock *BB,
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
507 BlockFilterSet *BlockFilter,
511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
516 bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB,
517 const MachineBasicBlock *Succ,
518 const BlockChain &SuccChain,
519 BranchProbability SuccProb,
520 BranchProbability RealSuccProb,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
523 BlockAndTailDupResult selectBestSuccessor(
const MachineBasicBlock *BB,
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
527 selectBestCandidateBlock(
const BlockChain &Chain,
528 SmallVectorImpl<MachineBasicBlock *> &WorkList);
530 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
533 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
542 void fillWorkLists(
const MachineBasicBlock *
MBB,
543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
544 const BlockFilterSet *BlockFilter);
546 void buildChain(
const MachineBasicBlock *BB, BlockChain &Chain,
547 BlockFilterSet *BlockFilter =
nullptr);
548 bool canMoveBottomBlockToTop(
const MachineBasicBlock *BottomBlock,
549 const MachineBasicBlock *OldTop);
550 bool hasViableTopFallthrough(
const MachineBasicBlock *Top,
551 const BlockFilterSet &LoopBlockSet);
552 BlockFrequency TopFallThroughFreq(
const MachineBasicBlock *Top,
553 const BlockFilterSet &LoopBlockSet);
554 BlockFrequency FallThroughGains(
const MachineBasicBlock *NewTop,
555 const MachineBasicBlock *OldTop,
556 const MachineBasicBlock *ExitBB,
557 const BlockFilterSet &LoopBlockSet);
558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
559 const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 MachineBasicBlock *findBestLoopTop(
const MachineLoop &L,
562 const BlockFilterSet &LoopBlockSet);
563 MachineBasicBlock *findBestLoopExit(
const MachineLoop &L,
564 const BlockFilterSet &LoopBlockSet,
565 BlockFrequency &ExitFreq);
566 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
567 void buildLoopChains(
const MachineLoop &L);
568 void rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
569 BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet);
570 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
577 bool shouldTailDuplicate(MachineBasicBlock *BB);
580 bool isProfitableToTailDup(
const MachineBasicBlock *BB,
581 const MachineBasicBlock *Succ,
582 BranchProbability QProb,
const BlockChain &Chain,
583 const BlockFilterSet *BlockFilter);
586 bool isTrellis(
const MachineBasicBlock *BB,
587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
588 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 BlockAndTailDupResult getBestTrellisSuccessor(
592 const MachineBasicBlock *BB,
593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
594 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
595 const BlockFilterSet *BlockFilter);
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
599 const MachineBasicBlock *BB,
604 bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB,
605 MachineBasicBlock *Succ,
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
611 void precomputeTriangleChains();
614 void applyExtTsp(
bool OptForSize);
617 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
620 void createCFGChainExtTsp();
623 MachineBlockPlacement(
const MachineBranchProbabilityInfo *MBPI,
624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
625 std::unique_ptr<MBFIWrapper> MBFI,
626 MachinePostDominatorTree *MPDT,
bool AllowTailMerge)
627 : MBPI(MBPI), MBFI(std::
move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
630 bool run(MachineFunction &F);
632 static bool allowTailDupPlacement(MachineFunction &MF) {
641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
645 bool runOnMachineFunction(MachineFunction &MF)
override {
650 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
651 auto MBFI = std::make_unique<MBFIWrapper>(
652 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
653 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
654 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
655 ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
658 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
659 auto *PassConfig = &getAnalysis<TargetPassConfig>();
660 bool AllowTailMerge = PassConfig->getEnableTailMerge();
661 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
666 void getAnalysisUsage(AnalysisUsage &AU)
const override {
667 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
668 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
670 AU.
addRequired<MachinePostDominatorTreeWrapperPass>();
680char MachineBlockPlacementLegacy::ID = 0;
685 "Branch Probability Basic Block Placement",
false,
false)
702 OS <<
" ('" << BB->
getName() <<
"')";
714void MachineBlockPlacement::markChainSuccessors(
716 const BlockFilterSet *BlockFilter) {
719 for (MachineBasicBlock *
MBB : Chain) {
720 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
730void MachineBlockPlacement::markBlockSuccessors(
731 const BlockChain &Chain,
const MachineBasicBlock *
MBB,
732 const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
738 if (BlockFilter && !BlockFilter->count(Succ))
740 BlockChain &SuccChain = *BlockToChain[Succ];
742 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
747 if (SuccChain.UnscheduledPredecessors == 0 ||
748 --SuccChain.UnscheduledPredecessors > 0)
751 auto *NewBB = *SuccChain.begin();
752 if (NewBB->isEHPad())
763BranchProbability MachineBlockPlacement::collectViableSuccessors(
764 const MachineBasicBlock *BB,
const BlockChain &Chain,
765 const BlockFilterSet *BlockFilter,
766 SmallVector<MachineBasicBlock *, 4> &Successors) {
784 for (MachineBasicBlock *Succ : BB->
successors()) {
785 bool SkipSucc =
false;
786 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
789 BlockChain *SuccChain = BlockToChain[Succ];
790 if (SuccChain == &Chain) {
792 }
else if (Succ != *SuccChain->begin()) {
794 <<
" -> Mid chain!\n");
804 return AdjustedSumProb;
809static BranchProbability
815 if (SuccProbN >= SuccProbD)
830 if (Successors.
count(&BB))
833 if (!Successors.
count(Succ))
841bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
862 return (Gain / ThresholdProb) >= EntryFreq;
870bool MachineBlockPlacement::isProfitableToTailDup(
871 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
872 BranchProbability QProb,
const BlockChain &Chain,
873 const BlockFilterSet *BlockFilter) {
897 MachineBasicBlock *PDom =
nullptr;
898 SmallVector<MachineBasicBlock *, 4> SuccSuccs;
900 auto AdjustedSuccSumProb =
901 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
903 auto BBFreq = MBFI->getBlockFreq(BB);
904 auto SuccFreq = MBFI->getBlockFreq(Succ);
905 BlockFrequency
P =
BBFreq * PProb;
906 BlockFrequency Qout =
BBFreq * QProb;
907 BlockFrequency EntryFreq = MBFI->getEntryFreq();
910 if (SuccSuccs.
size() == 0)
915 for (MachineBasicBlock *SuccSucc : SuccSuccs) {
917 if (Prob > BestSuccSucc)
927 auto SuccBestPred = BlockFrequency(0);
928 for (MachineBasicBlock *SuccPred : Succ->
predecessors()) {
929 if (SuccPred == Succ || SuccPred == BB ||
930 BlockToChain[SuccPred] == &Chain ||
931 (BlockFilter && !BlockFilter->count(SuccPred)))
935 if (Freq > SuccBestPred)
939 BlockFrequency Qin = SuccBestPred;
960 BranchProbability UProb = BestSuccSucc;
961 BranchProbability VProb = AdjustedSuccSumProb - UProb;
962 BlockFrequency
F = SuccFreq - Qin;
963 BlockFrequency
V = SuccFreq * VProb;
964 BlockFrequency QinU = std::min(Qin,
F) * UProb;
965 BlockFrequency BaseCost =
P +
V;
966 BlockFrequency DupCost = Qout + QinU + std::max(Qin,
F) * VProb;
970 BranchProbability VProb = AdjustedSuccSumProb - UProb;
971 BlockFrequency
U = SuccFreq * UProb;
972 BlockFrequency
V = SuccFreq * VProb;
973 BlockFrequency
F = SuccFreq - Qin;
1003 if (UProb > AdjustedSuccSumProb / 2 &&
1004 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1005 Chain, BlockFilter))
1008 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
1012 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
1013 std::max(Qin,
F) * UProb),
1024bool MachineBlockPlacement::isTrellis(
1025 const MachineBasicBlock *BB,
1026 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1027 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1036 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
1038 for (MachineBasicBlock *Succ : ViableSuccs) {
1047 if (Successors.count(SuccPred)) {
1049 for (MachineBasicBlock *CheckSucc : SuccPred->successors())
1050 if (!Successors.count(CheckSucc))
1054 const BlockChain *PredChain = BlockToChain[SuccPred];
1055 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1056 PredChain == &Chain || PredChain == BlockToChain[Succ])
1060 if (!SeenPreds.
insert(SuccPred).second)
1078std::pair<MachineBlockPlacement::WeightedEdge,
1079 MachineBlockPlacement::WeightedEdge>
1080MachineBlockPlacement::getBestNonConflictingEdges(
1081 const MachineBasicBlock *BB,
1091 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1095 auto BestA = Edges[0].begin();
1096 auto BestB = Edges[1].begin();
1099 if (BestA->Src == BestB->Src) {
1101 auto SecondBestA = std::next(BestA);
1102 auto SecondBestB = std::next(BestB);
1103 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1104 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1105 if (BestAScore < BestBScore)
1106 BestA = SecondBestA;
1108 BestB = SecondBestB;
1111 if (BestB->Src == BB)
1113 return std::make_pair(*BestA, *BestB);
1123MachineBlockPlacement::BlockAndTailDupResult
1124MachineBlockPlacement::getBestTrellisSuccessor(
1125 const MachineBasicBlock *BB,
1126 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1127 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
1128 const BlockFilterSet *BlockFilter) {
1130 BlockAndTailDupResult
Result = {
nullptr,
false};
1137 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1143 for (
auto *Succ : ViableSuccs) {
1144 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
1146 if (SuccPred != BB) {
1147 if (BlockFilter && !BlockFilter->count(SuccPred))
1149 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1150 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1153 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1155 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1161 WeightedEdge BestA, BestB;
1162 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1164 if (BestA.Src != BB) {
1168 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1175 if (BestA.Dest == BestB.Src) {
1178 MachineBasicBlock *Succ1 = BestA.Dest;
1179 MachineBasicBlock *Succ2 = BestB.Dest;
1181 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ2) &&
1182 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1184 Chain, BlockFilter)) {
1188 <<
", probability: " << Succ2Prob
1189 <<
" (Tail Duplicate)\n");
1191 Result.ShouldTailDup =
true;
1197 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1199 auto TrellisSucc = BestA.Dest;
1203 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1211bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1212 const MachineBasicBlock *BB, MachineBasicBlock *Succ,
1213 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1214 if (!shouldTailDuplicate(Succ))
1218 bool Duplicate =
true;
1220 unsigned int NumDup = 0;
1229 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1230 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1280 if (
F->getFunction().hasProfileData())
1311 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1334void MachineBlockPlacement::precomputeTriangleChains() {
1335 struct TriangleChain {
1336 std::vector<MachineBasicBlock *> Edges;
1338 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
1339 : Edges({src, dst}) {}
1341 void append(MachineBasicBlock *dst) {
1342 assert(getKey()->isSuccessor(dst) &&
1343 "Attempting to append a block that is not a successor.");
1344 Edges.push_back(dst);
1347 unsigned count()
const {
return Edges.size() - 1; }
1349 MachineBasicBlock *getKey()
const {
return Edges.back(); }
1358 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
1359 for (MachineBasicBlock &BB : *
F) {
1363 MachineBasicBlock *PDom =
nullptr;
1364 for (MachineBasicBlock *Succ : BB.
successors()) {
1372 if (PDom ==
nullptr)
1379 if (!shouldTailDuplicate(PDom))
1381 bool CanTailDuplicate =
true;
1388 CanTailDuplicate =
false;
1394 if (!CanTailDuplicate)
1401 auto Found = TriangleChainMap.
find(&BB);
1404 if (Found != TriangleChainMap.
end()) {
1405 TriangleChain Chain = std::move(Found->second);
1406 TriangleChainMap.
erase(Found);
1408 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1410 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1411 assert(InsertResult.second &&
"Block seen twice.");
1419 for (
auto &ChainPair : TriangleChainMap) {
1420 TriangleChain &Chain = ChainPair.second;
1426 MachineBasicBlock *dst = Chain.Edges.back();
1427 Chain.Edges.pop_back();
1428 for (MachineBasicBlock *src :
reverse(Chain.Edges)) {
1431 <<
" as pre-computed based on triangles.\n");
1433 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1434 assert(InsertResult.second &&
"Block seen twice.");
1444static BranchProbability
1476bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1477 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
1478 const BlockChain &SuccChain, BranchProbability SuccProb,
1479 BranchProbability RealSuccProb,
const BlockChain &Chain,
1480 const BlockFilterSet *BlockFilter) {
1483 if (SuccChain.UnscheduledPredecessors == 0)
1608 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1609 bool BadCFGConflict =
false;
1612 BlockChain *PredChain = BlockToChain[Pred];
1613 if (Pred == Succ || PredChain == &SuccChain ||
1614 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1615 Pred != *std::prev(PredChain->end()) ||
1634 BlockFrequency PredEdgeFreq =
1636 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1637 BadCFGConflict =
true;
1642 if (BadCFGConflict) {
1644 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1661MachineBlockPlacement::BlockAndTailDupResult
1662MachineBlockPlacement::selectBestSuccessor(
const MachineBasicBlock *BB,
1663 const BlockChain &Chain,
1664 const BlockFilterSet *BlockFilter) {
1667 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1670 SmallVector<MachineBasicBlock *, 4> Successors;
1671 auto AdjustedSumProb =
1672 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1679 auto FoundEdge = ComputedEdges.
find(BB);
1680 if (FoundEdge != ComputedEdges.
end()) {
1681 MachineBasicBlock *Succ = FoundEdge->second.BB;
1682 ComputedEdges.
erase(FoundEdge);
1683 BlockChain *SuccChain = BlockToChain[Succ];
1684 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1685 SuccChain != &Chain && Succ == *SuccChain->begin())
1686 return FoundEdge->second;
1691 if (isTrellis(BB, Successors, Chain, BlockFilter))
1692 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1700 for (MachineBasicBlock *Succ : Successors) {
1702 BranchProbability SuccProb =
1705 BlockChain &SuccChain = *BlockToChain[Succ];
1708 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1709 Chain, BlockFilter)) {
1711 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ))
1718 <<
", probability: " << SuccProb
1719 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1722 if (BestSucc.BB && BestProb >= SuccProb) {
1729 BestProb = SuccProb;
1737 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1738 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1739 return std::get<0>(L) > std::get<0>(R);
1741 for (
auto &Tup : DupCandidates) {
1742 BranchProbability DupProb;
1743 MachineBasicBlock *Succ;
1744 std::tie(DupProb, Succ) = Tup;
1745 if (DupProb < BestProb)
1747 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1748 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1750 <<
", probability: " << DupProb
1751 <<
" (Tail Duplicate)\n");
1753 BestSucc.ShouldTailDup =
true;
1774MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1775 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
1781 return BlockToChain.
lookup(BB) == &Chain;
1784 if (WorkList.
empty())
1787 bool IsEHPad = WorkList[0]->isEHPad();
1789 MachineBasicBlock *BestBlock =
nullptr;
1790 BlockFrequency BestFreq;
1791 for (MachineBasicBlock *
MBB : WorkList) {
1793 "EHPad mismatch between block and work list.");
1795 BlockChain &SuccChain = *BlockToChain[
MBB];
1796 if (&SuccChain == &Chain)
1799 assert(SuccChain.UnscheduledPredecessors == 0 &&
1800 "Found CFG-violating block");
1802 BlockFrequency CandidateFreq = MBFI->getBlockFreq(
MBB);
1825 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1829 BestFreq = CandidateFreq;
1842MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1843 const BlockChain &PlacedChain,
1848 if (BlockChain *Chain = BlockToChain[&*
I]; Chain != &PlacedChain) {
1849 PrevUnplacedBlockIt =
I;
1853 return *Chain->begin();
1869MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1870 const BlockChain &PlacedChain,
1871 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1872 const BlockFilterSet *BlockFilter) {
1874 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1875 ++PrevUnplacedBlockInFilterIt) {
1876 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1877 if (
C != &PlacedChain) {
1884void MachineBlockPlacement::fillWorkLists(
1885 const MachineBasicBlock *
MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
1886 const BlockFilterSet *BlockFilter =
nullptr) {
1887 BlockChain &Chain = *BlockToChain[
MBB];
1888 if (!UpdatedPreds.
insert(&Chain).second)
1892 Chain.UnscheduledPredecessors == 0 &&
1893 "Attempting to place block with unscheduled predecessors in worklist.");
1894 for (MachineBasicBlock *ChainBB : Chain) {
1895 assert(BlockToChain[ChainBB] == &Chain &&
1896 "Block in chain doesn't match BlockToChain map.");
1897 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
1898 if (BlockFilter && !BlockFilter->count(Pred))
1900 if (BlockToChain[Pred] == &Chain)
1902 ++Chain.UnscheduledPredecessors;
1906 if (Chain.UnscheduledPredecessors != 0)
1909 MachineBasicBlock *BB = *Chain.
begin();
1916void MachineBlockPlacement::buildChain(
const MachineBasicBlock *HeadBB,
1918 BlockFilterSet *BlockFilter) {
1919 assert(HeadBB &&
"BB must not be null.\n");
1920 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1922 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1924 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1926 const MachineBasicBlock *LoopHeaderBB = HeadBB;
1927 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1928 MachineBasicBlock *BB = *std::prev(Chain.end());
1930 assert(BB &&
"null block found at end of chain in loop.");
1931 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1932 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1936 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1937 MachineBasicBlock *BestSucc =
Result.BB;
1938 bool ShouldTailDup =
Result.ShouldTailDup;
1939 if (allowTailDupPlacement(*
F))
1940 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1941 BB, BestSucc, Chain, BlockFilter));
1947 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1949 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1953 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1956 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1960 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1961 "layout successor until the CFG reduces\n");
1966 if (allowTailDupPlacement(*
F) && BestSucc && ShouldTailDup) {
1967 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1968 BlockFilter, PrevUnplacedBlockIt,
1969 PrevUnplacedBlockInFilterIt);
1977 BlockChain &SuccChain = *BlockToChain[BestSucc];
1980 SuccChain.UnscheduledPredecessors = 0;
1983 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1984 Chain.merge(BestSucc, &SuccChain);
1985 BB = *std::prev(Chain.end());
2006bool MachineBlockPlacement::canMoveBottomBlockToTop(
2007 const MachineBasicBlock *BottomBlock,
const MachineBasicBlock *OldTop) {
2010 MachineBasicBlock *Pred = *BottomBlock->
pred_begin();
2014 MachineBasicBlock *OtherBB = *Pred->
succ_begin();
2015 if (OtherBB == BottomBlock)
2017 if (OtherBB == OldTop)
2025MachineBlockPlacement::TopFallThroughFreq(
const MachineBasicBlock *Top,
2026 const BlockFilterSet &LoopBlockSet) {
2027 BlockFrequency MaxFreq = BlockFrequency(0);
2029 BlockChain *PredChain = BlockToChain[Pred];
2030 if (!LoopBlockSet.count(Pred) &&
2031 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2036 for (MachineBasicBlock *Succ : Pred->
successors()) {
2038 BlockChain *SuccChain = BlockToChain[Succ];
2041 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2042 (!SuccChain || Succ == *SuccChain->begin())) {
2048 BlockFrequency EdgeFreq =
2050 if (EdgeFreq > MaxFreq)
2079BlockFrequency MachineBlockPlacement::FallThroughGains(
2080 const MachineBasicBlock *NewTop,
const MachineBasicBlock *OldTop,
2081 const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet) {
2082 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2083 BlockFrequency FallThrough2Exit = BlockFrequency(0);
2087 BlockFrequency BackEdgeFreq =
2091 MachineBasicBlock *BestPred =
nullptr;
2092 BlockFrequency FallThroughFromPred = BlockFrequency(0);
2093 for (MachineBasicBlock *Pred : NewTop->
predecessors()) {
2094 if (!LoopBlockSet.count(Pred))
2096 BlockChain *PredChain = BlockToChain[Pred];
2097 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2098 BlockFrequency EdgeFreq =
2100 if (EdgeFreq > FallThroughFromPred) {
2101 FallThroughFromPred = EdgeFreq;
2109 BlockFrequency NewFreq = BlockFrequency(0);
2111 for (MachineBasicBlock *Succ : BestPred->
successors()) {
2112 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2116 BlockChain *SuccChain = BlockToChain[Succ];
2117 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2118 (SuccChain == BlockToChain[BestPred]))
2120 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2122 if (EdgeFreq > NewFreq)
2125 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2127 if (NewFreq > OrigEdgeFreq) {
2131 NewFreq = BlockFrequency(0);
2132 FallThroughFromPred = BlockFrequency(0);
2136 BlockFrequency
Result = BlockFrequency(0);
2137 BlockFrequency Gains = BackEdgeFreq + NewFreq;
2138 BlockFrequency Lost =
2139 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2167MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
2168 MachineBasicBlock *OldTop,
const MachineLoop &L,
2169 const BlockFilterSet &LoopBlockSet) {
2173 BlockChain &HeaderChain = *BlockToChain[OldTop];
2174 if (!LoopBlockSet.count(*HeaderChain.begin()))
2176 if (OldTop != *HeaderChain.begin())
2182 BlockFrequency BestGains = BlockFrequency(0);
2183 MachineBasicBlock *BestPred =
nullptr;
2184 for (MachineBasicBlock *Pred : OldTop->
predecessors()) {
2185 if (!LoopBlockSet.count(Pred))
2187 if (Pred ==
L.getHeader())
2195 MachineBasicBlock *OtherBB =
nullptr;
2198 if (OtherBB == OldTop)
2202 if (!canMoveBottomBlockToTop(Pred, OldTop))
2205 BlockFrequency Gains =
2206 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2207 if ((Gains > BlockFrequency(0)) &&
2208 (Gains > BestGains ||
2223 (*BestPred->
pred_begin())->succ_size() == 1 &&
2236MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2237 const BlockFilterSet &LoopBlockSet) {
2246 return L.getHeader();
2248 MachineBasicBlock *OldTop =
nullptr;
2249 MachineBasicBlock *NewTop =
L.getHeader();
2250 while (NewTop != OldTop) {
2252 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2253 if (NewTop != OldTop)
2254 ComputedEdges[NewTop] = {OldTop,
false};
2265MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2266 const BlockFilterSet &LoopBlockSet,
2267 BlockFrequency &ExitFreq) {
2276 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2277 if (!LoopBlockSet.count(*HeaderChain.begin()))
2280 BlockFrequency BestExitEdgeFreq;
2281 unsigned BestExitLoopDepth = 0;
2282 MachineBasicBlock *ExitingBB =
nullptr;
2286 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
2290 for (MachineBasicBlock *
MBB :
L.getBlocks()) {
2291 BlockChain &Chain = *BlockToChain[
MBB];
2294 if (
MBB != *std::prev(Chain.end()))
2301 MachineBasicBlock *OldExitingBB = ExitingBB;
2302 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2303 bool HasLoopingSucc =
false;
2309 BlockChain &SuccChain = *BlockToChain[Succ];
2311 if (&Chain == &SuccChain) {
2318 if (LoopBlockSet.count(Succ)) {
2321 HasLoopingSucc =
true;
2325 unsigned SuccLoopDepth = 0;
2326 if (MachineLoop *ExitLoop = MLI->
getLoopFor(Succ)) {
2327 SuccLoopDepth = ExitLoop->getLoopDepth();
2328 if (ExitLoop->contains(&L))
2332 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(
MBB) * SuccProb;
2335 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2342 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2343 ExitEdgeFreq > BestExitEdgeFreq ||
2345 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2346 BestExitEdgeFreq = ExitEdgeFreq;
2351 if (!HasLoopingSucc) {
2353 ExitingBB = OldExitingBB;
2354 BestExitEdgeFreq = OldBestExitEdgeFreq;
2361 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2364 if (
L.getNumBlocks() == 1) {
2365 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2372 if (!BlocksExitingToOuterLoop.
empty() &&
2373 !BlocksExitingToOuterLoop.
count(ExitingBB))
2378 ExitFreq = BestExitEdgeFreq;
2386bool MachineBlockPlacement::hasViableTopFallthrough(
2387 const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet) {
2389 BlockChain *PredChain = BlockToChain[Pred];
2390 if (!LoopBlockSet.count(Pred) &&
2391 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2396 for (MachineBasicBlock *Succ : Pred->
successors()) {
2398 BlockChain *SuccChain = BlockToChain[Succ];
2401 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2419void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2420 const MachineBasicBlock *ExitingBB,
2421 BlockFrequency ExitFreq,
2422 const BlockFilterSet &LoopBlockSet) {
2426 MachineBasicBlock *Top = *LoopChain.begin();
2427 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
2430 if (Bottom == ExitingBB)
2437 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2442 if (ViableTopFallthrough) {
2443 for (MachineBasicBlock *Succ : Bottom->
successors()) {
2444 BlockChain *SuccChain = BlockToChain[Succ];
2445 if (!LoopBlockSet.count(Succ) &&
2446 (!SuccChain || Succ == *SuccChain->begin()))
2452 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2453 if (FallThrough2Top >= ExitFreq)
2457 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2458 if (ExitIt == LoopChain.end())
2480 if (ViableTopFallthrough) {
2481 assert(std::next(ExitIt) != LoopChain.end() &&
2482 "Exit should not be last BB");
2483 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
2491 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2507void MachineBlockPlacement::rotateLoopWithProfile(
2508 BlockChain &LoopChain,
const MachineLoop &L,
2509 const BlockFilterSet &LoopBlockSet) {
2510 auto RotationPos = LoopChain.end();
2511 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
2521 auto ScaleBlockFrequency = [](BlockFrequency Freq,
2522 unsigned Scale) -> BlockFrequency {
2524 return BlockFrequency(0);
2527 return Freq / BranchProbability(1, Scale);
2533 BlockFrequency HeaderFallThroughCost(0);
2535 BlockChain *PredChain = BlockToChain[Pred];
2536 if (!LoopBlockSet.count(Pred) &&
2537 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2538 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2540 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2544 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2545 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2554 for (
auto *BB : LoopChain) {
2557 BlockChain *SuccChain = BlockToChain[Succ];
2558 if (!LoopBlockSet.count(Succ) &&
2559 (!SuccChain || Succ == *SuccChain->begin())) {
2561 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2565 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2573 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2574 EndIter = LoopChain.end();
2575 Iter != EndIter; Iter++, TailIter++) {
2578 if (TailIter == LoopChain.end())
2579 TailIter = LoopChain.begin();
2581 auto TailBB = *TailIter;
2584 BlockFrequency
Cost = BlockFrequency(0);
2589 if (Iter != LoopChain.begin())
2590 Cost += HeaderFallThroughCost;
2594 for (
auto &ExitWithFreq : ExitsWithFreq)
2595 if (TailBB != ExitWithFreq.first)
2596 Cost += ExitWithFreq.second;
2612 if (TailBB->isSuccessor(*Iter)) {
2613 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2614 if (TailBB->succ_size() == 1)
2616 else if (TailBB->succ_size() == 2) {
2618 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2619 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
2620 ? TailBBFreq * TailToHeadProb.
getCompl()
2631 if (
Cost < SmallestRotationCost) {
2632 SmallestRotationCost =
Cost;
2637 if (RotationPos != LoopChain.end()) {
2639 <<
" to the top\n");
2640 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2648MachineBlockPlacement::BlockFilterSet
2649MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2653 bool operator()(
const MachineBasicBlock *
X,
2654 const MachineBasicBlock *
Y)
const {
2655 return X->getNumber() <
Y->getNumber();
2658 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2670 BlockFrequency LoopFreq(0);
2671 for (
auto *LoopPred :
L.getHeader()->predecessors())
2672 if (!
L.contains(LoopPred))
2673 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2676 for (MachineBasicBlock *LoopBB :
L.getBlocks()) {
2677 if (LoopBlockSet.count(LoopBB))
2682 BlockChain *Chain = BlockToChain[LoopBB];
2683 for (MachineBasicBlock *ChainBB : *Chain)
2684 LoopBlockSet.insert(ChainBB);
2687 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2692 BlockFilterSet
Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2702void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2705 for (
const MachineLoop *InnerLoop : L)
2706 buildLoopChains(*InnerLoop);
2709 "BlockWorkList not empty when starting to build loop chains.");
2711 "EHPadWorkList not empty when starting to build loop chains.");
2712 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2717 bool RotateLoopWithProfile =
2725 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2733 PreferredLoopExit =
nullptr;
2734 BlockFrequency ExitFreq;
2735 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2736 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2738 BlockChain &LoopChain = *BlockToChain[LoopTop];
2743 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2744 assert(LoopChain.UnscheduledPredecessors == 0 &&
2745 "LoopChain should not have unscheduled predecessors.");
2746 UpdatedPreds.
insert(&LoopChain);
2748 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2749 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2751 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2753 if (RotateLoopWithProfile)
2754 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2756 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2760 bool BadLoop =
false;
2761 if (LoopChain.UnscheduledPredecessors) {
2763 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2764 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2765 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
2767 for (MachineBasicBlock *ChainBB : LoopChain) {
2769 if (!LoopBlockSet.remove(ChainBB)) {
2773 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2774 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2775 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2780 if (!LoopBlockSet.empty()) {
2782 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2783 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2784 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2785 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2788 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2791 BlockWorkList.
clear();
2792 EHPadWorkList.
clear();
2795void MachineBlockPlacement::buildCFGChains() {
2801 MachineBasicBlock *BB = &*FI;
2803 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2808 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2813 MachineBasicBlock *NextBB = &*NextFI;
2816 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2817 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2820 Chain->merge(NextBB,
nullptr);
2822 BlocksWithUnanalyzableExits.
insert(&*BB);
2830 PreferredLoopExit =
nullptr;
2831 for (MachineLoop *L : *MLI)
2832 buildLoopChains(*L);
2835 "BlockWorkList should be empty before building final chain.");
2837 "EHPadWorkList should be empty before building final chain.");
2839 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2840 for (MachineBasicBlock &
MBB : *
F)
2841 fillWorkLists(&
MBB, UpdatedPreds);
2843 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2844 buildChain(&
F->front(), FunctionChain);
2847 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
2851 bool BadFunc =
false;
2852 FunctionBlockSetType FunctionBlockSet;
2853 for (MachineBasicBlock &
MBB : *
F)
2854 FunctionBlockSet.insert(&
MBB);
2856 for (MachineBasicBlock *ChainBB : FunctionChain)
2857 if (!FunctionBlockSet.erase(ChainBB)) {
2859 dbgs() <<
"Function chain contains a block not in the function!\n"
2863 if (!FunctionBlockSet.empty()) {
2865 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2866 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2867 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2869 assert(!BadFunc &&
"Detected problems with the block placement.");
2874 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2875 F->getNumBlockIDs());
2877 MachineBasicBlock *LastMBB =
nullptr;
2878 for (
auto &
MBB : *
F) {
2879 if (LastMBB !=
nullptr)
2883 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2889 for (MachineBasicBlock *ChainBB : FunctionChain) {
2890 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2894 F->splice(InsertPos, ChainBB);
2899 if (ChainBB == *FunctionChain.begin())
2907 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2910 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2916 "Unexpected block with un-analyzable fallthrough!");
2918 TBB = FBB =
nullptr;
2950 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2952 MachineBasicBlock *PrevBB = &
F->
back();
2956 BlockWorkList.
clear();
2957 EHPadWorkList.
clear();
2960void MachineBlockPlacement::optimizeBranches() {
2961 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2970 for (MachineBasicBlock *ChainBB : FunctionChain) {
2972 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2993 auto Dl = ChainBB->findBranchDebugLoc();
2999void MachineBlockPlacement::alignBlocks() {
3006 if (
F->getFunction().hasMinSize() ||
3011 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
3013 if (FunctionChain.begin() == FunctionChain.end())
3016 const BranchProbability ColdProb(1, 5);
3017 BlockFrequency EntryFreq = MBFI->getBlockFreq(&
F->front());
3018 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
3019 for (MachineBasicBlock *ChainBB : FunctionChain) {
3020 if (ChainBB == *FunctionChain.begin())
3027 MachineLoop *
L = MLI->getLoopFor(ChainBB);
3032 unsigned MDAlign = 1;
3033 MDNode *LoopID =
L->getLoopID();
3042 if (S->
getString() ==
"llvm.loop.align") {
3044 "per-loop align metadata should have two operands.");
3047 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3053 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3059 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
3060 if (Freq < WeightedEntryFreq)
3065 MachineBasicBlock *LoopHeader =
L->getHeader();
3066 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3067 if (Freq < (LoopHeaderFreq * ColdProb))
3077 MachineBasicBlock *LayoutPred =
3080 auto DetermineMaxAlignmentPadding = [&]() {
3087 ChainBB->setMaxBytesForAlignment(MaxBytes);
3093 ChainBB->setAlignment(LoopAlign);
3094 DetermineMaxAlignmentPadding();
3102 BranchProbability LayoutProb =
3104 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3105 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3106 ChainBB->setAlignment(LoopAlign);
3107 DetermineMaxAlignmentPadding();
3111 const bool HasMaxBytesOverride =
3116 for (MachineBasicBlock &
MBB : *
F) {
3117 if (HasMaxBytesOverride)
3126 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3127 auto LayoutPred = std::prev(MBI);
3129 if (HasMaxBytesOverride)
3154bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3155 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
3156 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
3158 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3159 bool Removed, DuplicatedToLPred;
3160 bool DuplicatedToOriginalLPred;
3161 Removed = maybeTailDuplicateBlock(
3162 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3163 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3166 DuplicatedToOriginalLPred = DuplicatedToLPred;
3171 while (DuplicatedToLPred && Removed) {
3172 MachineBasicBlock *DupBB, *DupPred;
3178 BlockChain::iterator ChainEnd = Chain.end();
3179 DupBB = *(--ChainEnd);
3181 if (ChainEnd == Chain.begin())
3183 DupPred = *std::prev(ChainEnd);
3184 Removed = maybeTailDuplicateBlock(
3185 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3186 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3193 LPred = *std::prev(Chain.end());
3194 if (DuplicatedToOriginalLPred)
3195 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3212bool MachineBlockPlacement::maybeTailDuplicateBlock(
3213 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
3215 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3216 bool &DuplicatedToLPred) {
3217 DuplicatedToLPred =
false;
3218 if (!shouldTailDuplicate(BB))
3226 bool Removed =
false;
3227 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {
3232 if (
auto It = BlockToChain.
find(RemBB); It != BlockToChain.
end()) {
3233 It->second->remove(RemBB);
3234 BlockToChain.
erase(It);
3238 if (&(*PrevUnplacedBlockIt) == RemBB) {
3239 PrevUnplacedBlockIt++;
3243 if (RemBB->isEHPad()) {
3254 if (It != BlockFilter->end()) {
3255 if (It < PrevUnplacedBlockInFilterIt) {
3256 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
3259 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3260 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3261 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3263 }
else if (It == PrevUnplacedBlockInFilterIt)
3266 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3268 BlockFilter->erase(It);
3273 MLI->removeBlock(RemBB);
3274 if (RemBB == PreferredLoopExit)
3275 PreferredLoopExit =
nullptr;
3280 auto RemovalCallbackRef =
3281 function_ref<void(MachineBasicBlock *)>(RemovalCallback);
3283 SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
3285 SmallVector<MachineBasicBlock *, 8> CandidatePreds;
3286 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr =
nullptr;
3287 if (
F->getFunction().hasProfileData()) {
3289 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3290 if (CandidatePreds.
size() == 0)
3293 CandidatePtr = &CandidatePreds;
3296 &RemovalCallbackRef, CandidatePtr);
3299 DuplicatedToLPred =
false;
3300 for (MachineBasicBlock *Pred : DuplicatedPreds) {
3302 BlockChain *PredChain = BlockToChain[Pred];
3304 DuplicatedToLPred =
true;
3305 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3306 PredChain == &Chain)
3308 for (MachineBasicBlock *NewSucc : Pred->
successors()) {
3309 if (BlockFilter && !BlockFilter->count(NewSucc))
3311 BlockChain *NewChain = BlockToChain[NewSucc];
3312 if (NewChain != &Chain && NewChain != PredChain)
3313 NewChain->UnscheduledPredecessors++;
3323 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3332BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3337bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3338 MachineBasicBlock *Pred,
3339 BlockFilterSet *BlockFilter) {
3342 if (BlockFilter && !BlockFilter->count(Pred))
3344 BlockChain *PredChain = BlockToChain[Pred];
3345 if (PredChain && (Pred != *std::prev(PredChain->end())))
3350 for (MachineBasicBlock *Succ : Pred->
successors())
3352 if (BlockFilter && !BlockFilter->count(Succ))
3354 BlockChain *SuccChain = BlockToChain[Succ];
3355 if (SuccChain && (Succ != *SuccChain->begin()))
3358 if (SuccProb > BestProb)
3359 BestProb = SuccProb;
3363 if (BBProb <= BestProb)
3368 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3369 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3370 return Gain > scaleThreshold(BB);
3375void MachineBlockPlacement::findDuplicateCandidates(
3376 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
3377 BlockFilterSet *BlockFilter) {
3378 MachineBasicBlock *Fallthrough =
nullptr;
3380 BlockFrequency BBDupThreshold(scaleThreshold(BB));
3381 SmallVector<MachineBasicBlock *, 8> Preds(BB->
predecessors());
3382 SmallVector<MachineBasicBlock *, 8> Succs(BB->
successors());
3385 auto CmpSucc = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3388 auto CmpPred = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3389 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3394 auto SuccIt = Succs.begin();
3395 if (SuccIt != Succs.end()) {
3440 for (MachineBasicBlock *Pred : Preds) {
3441 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3446 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3448 if (SuccIt != Succs.end())
3454 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3455 BlockFrequency DupCost;
3456 if (SuccIt == Succs.end()) {
3458 if (Succs.size() > 0)
3459 DupCost += PredFreq;
3462 DupCost += PredFreq;
3466 assert(OrigCost >= DupCost);
3467 OrigCost -= DupCost;
3468 if (OrigCost > BBDupThreshold) {
3470 if (SuccIt != Succs.end())
3478 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3479 Candidates[0] = Candidates.
back();
3485void MachineBlockPlacement::initTailDupThreshold() {
3486 DupThreshold = BlockFrequency(0);
3487 if (
F->getFunction().hasProfileData()) {
3491 UseProfileCount =
true;
3496 BlockFrequency MaxFreq = BlockFrequency(0);
3497 for (MachineBasicBlock &
MBB : *
F) {
3498 BlockFrequency Freq = MBFI->getBlockFreq(&
MBB);
3504 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3505 UseProfileCount =
false;
3517 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3529 (OptLevel < CodeGenOptLevel::Aggressive ||
3531 TailDupSize =
TII->getTailDuplicateSize(OptLevel);
3538 auto MBFI = std::make_unique<MBFIWrapper>(
3541 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3545 .getCachedResult<ProfileSummaryAnalysis>(
3551 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3563 OS << MapClassName2PassName(
name());
3564 if (!AllowTailMerge)
3565 OS <<
"<no-tail-merge>";
3571 if (std::next(MF.
begin()) == MF.
end())
3575 OptLevel =
F->getTarget().getOptLevel();
3582 PreferredLoopExit =
nullptr;
3585 "BlockToChain map should be empty before starting placement.");
3587 "Computed Edge map should be empty before starting placement.");
3590 initTailDupThreshold();
3592 const bool OptForSize =
3597 bool UseExtTspForPerf =
false;
3598 bool UseExtTspForSize =
false;
3607 if (allowTailDupPlacement(*
F)) {
3610 const bool PreRegAlloc =
false;
3611 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3613 if (!UseExtTspForSize)
3614 precomputeTriangleChains();
3618 if (!UseExtTspForSize)
3628 if (EnableTailMerge) {
3630 BranchFolder BF(
true,
false,
3638 if (!UseExtTspForSize) {
3640 BlockToChain.
clear();
3641 ComputedEdges.
clear();
3651 if (UseExtTspForPerf || UseExtTspForSize) {
3653 !(UseExtTspForPerf && UseExtTspForSize) &&
3654 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3655 applyExtTsp(UseExtTspForSize);
3656 createCFGChainExtTsp();
3662 BlockToChain.
clear();
3663 ComputedEdges.
clear();
3672 MBFI->view(
"MBP." + MF.
getName(),
false);
3680void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3682 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3684 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3685 CurrentBlockOrder.reserve(
F->size());
3686 size_t NumBlocks = 0;
3687 for (
const MachineBasicBlock &
MBB : *
F) {
3688 BlockIndex[&
MBB] = NumBlocks++;
3689 CurrentBlockOrder.push_back(&
MBB);
3692 SmallVector<uint64_t, 0> BlockCounts(
F->size());
3693 SmallVector<uint64_t, 0> BlockSizes(
F->size());
3697 for (MachineBasicBlock &
MBB : *
F) {
3699 BlockFrequency BlockFreq = MBFI->getBlockFreq(&
MBB);
3700 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3709 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3710 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3715 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3726 if (FBB && FBB != FTB)
3733 const uint64_t Freq = Succs.
size() == 1 ? 110 : 100;
3734 for (
const MachineBasicBlock *Succ : Succs)
3735 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3739 BlockFrequency JumpFreq = BlockFreq * EP;
3746 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3747 <<
" with profile = " <<
F->getFunction().hasProfileData()
3748 <<
" (" <<
F->getName() <<
")" <<
"\n");
3755 std::vector<const MachineBasicBlock *> NewBlockOrder;
3756 NewBlockOrder.reserve(
F->size());
3757 for (uint64_t Node : NewOrder) {
3758 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3760 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3764 if (OptForSize && OrgScore > OptScore)
3765 assignBlockOrder(CurrentBlockOrder);
3767 assignBlockOrder(NewBlockOrder);
3770void MachineBlockPlacement::assignBlockOrder(
3771 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3772 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3773 F->RenumberBlocks();
3780 bool HasChanges =
false;
3781 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3782 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3791 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(
F->getNumBlockIDs());
3792 for (
auto &
MBB : *
F) {
3797 DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3798 for (
const MachineBasicBlock *
MBB : NewBlockOrder) {
3799 NewIndex[
MBB] = NewIndex.
size();
3801 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3802 return NewIndex[&
L] < NewIndex[&
R];
3807 const TargetInstrInfo *
TII =
F->getSubtarget().getInstrInfo();
3809 for (
auto &
MBB : *
F) {
3816 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3822 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3829void MachineBlockPlacement::createCFGChainExtTsp() {
3830 BlockToChain.
clear();
3831 ComputedEdges.
clear();
3834 MachineBasicBlock *HeadBB = &
F->
front();
3835 BlockChain *FunctionChain =
3836 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3838 for (MachineBasicBlock &
MBB : *
F) {
3841 FunctionChain->merge(&
MBB,
nullptr);
3853class MachineBlockPlacementStats {
3855 const MachineBranchProbabilityInfo *MBPI;
3858 const MachineBlockFrequencyInfo *MBFI;
3861 MachineBlockPlacementStats(
const MachineBranchProbabilityInfo *MBPI,
3862 const MachineBlockFrequencyInfo *MBFI)
3863 : MBPI(MBPI), MBFI(MBFI) {}
3864 bool run(MachineFunction &MF);
3867class MachineBlockPlacementStatsLegacy :
public MachineFunctionPass {
3871 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(
ID) {
3876 bool runOnMachineFunction(MachineFunction &
F)
override {
3878 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3879 auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3880 return MachineBlockPlacementStats(MBPI, MBFI).run(
F);
3883 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3884 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
3885 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
3893char MachineBlockPlacementStatsLegacy::ID = 0;
3898 "Basic Block Placement Stats",
false,
false)
3910 MachineBlockPlacementStats(&MBPI, &MBFI).
run(MF);
3916 if (std::next(
F.begin()) ==
F.end())
3925 (
MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3927 (
MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3930 if (
MBB.isLayoutSuccessor(Succ))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
StringRef - Represent a constant reference to a string, i.e.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
cl::opt< unsigned > ProfileLikelyProb
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
cl::opt< unsigned > StaticLikelyProb
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI void initializeMachineBlockPlacementLegacyPass(PassRegistry &)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.