LLVM 22.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CFG.h"
32#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Operator.h"
44#include "llvm/IR/User.h"
50#include "llvm/Transforms/IPO.h"
54#include <algorithm>
55#include <cassert>
56#include <cstdint>
57#include <memory>
58#include <tuple>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "partial-inlining"
64
65STATISTIC(NumPartialInlined,
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
70STATISTIC(NumColdRegionsFound,
71 "Number of cold single entry/exit regions found.");
72STATISTIC(NumColdRegionsOutlined,
73 "Number of cold single entry/exit regions outlined.");
74
75// Command line option to disable partial-inlining. The default is false:
76static cl::opt<bool>
77 DisablePartialInlining("disable-partial-inlining", cl::init(false),
78 cl::Hidden, cl::desc("Disable partial inlining"));
79// Command line option to disable multi-region partial-inlining. The default is
80// false:
82 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83 cl::desc("Disable multi-region partial inlining"));
84
85// Command line option to force outlining in regions with live exit variables.
86// The default is false:
87static cl::opt<bool>
88 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89 cl::desc("Force outline regions with live exits"));
90
91// Command line option to enable marking outline functions with Cold Calling
92// Convention. The default is false:
93static cl::opt<bool>
94 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95 cl::desc("Mark outline function calls with ColdCC"));
96
97// This is an option used by testing:
98static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99
101 cl::desc("Skip Cost Analysis"));
102// Used to determine if a cold region is worth outlining based on
103// its inlining cost compared to the original function. Default is set at 10%.
104// ie. if the cold region reduces the inlining cost of the original function by
105// at least 10%.
107 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108 cl::desc("Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
110// Used to tune the minimum number of execution counts needed in the predecessor
111// block to the cold edge. ie. confidence interval.
113 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114 cl::desc("Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
116// Used to determine when an edge is considered cold. Default is set to 10%. ie.
117// if the branch probability is 10% or less, then it is deemed as 'cold'.
119 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120 cl::desc("Minimum BranchProbability to consider a region cold."));
121
123 "max-num-inline-blocks", cl::init(5), cl::Hidden,
124 cl::desc("Max number of blocks to be partially inlined"));
125
126// Command line option to set the maximum number of partial inlining allowed
127// for the module. The default value of -1 means no limit.
129 "max-partial-inlining", cl::init(-1), cl::Hidden,
130 cl::desc("Max number of partial inlining. The default is unlimited"));
131
132// Used only when PGO or user annotated branch data is absent. It is
133// the least value that is used to weigh the outline region. If BFI
134// produces larger value, the BFI value will be used.
135static cl::opt<int>
136 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138 cl::desc("Relative frequency of outline region to "
139 "the entry block"));
140
142 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143 cl::desc("A debug option to add additional penalty to the computed one."));
144
145namespace {
146
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() = default;
149
150 // Returns the number of blocks to be inlined including all blocks
151 // in Entries and one return block.
152 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153
154 // A set of blocks including the function entry that guard
155 // the region to be outlined.
157
158 // The return block that is not included in the outlined region.
159 BasicBlock *ReturnBlock = nullptr;
160
161 // The dominating block of the region to be outlined.
162 BasicBlock *NonReturnBlock = nullptr;
163
164 // The set of blocks in Entries that are predecessors to ReturnBlock
165 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166};
167
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() = default;
170
171 // Container for outline regions
172 struct OutlineRegionInfo {
173 OutlineRegionInfo(ArrayRef<BasicBlock *> Region, BasicBlock *EntryBlock,
174 BasicBlock *ExitBlock, BasicBlock *ReturnBlock)
175 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
176 ReturnBlock(ReturnBlock) {}
177 SmallVector<BasicBlock *, 8> Region;
178 BasicBlock *EntryBlock;
179 BasicBlock *ExitBlock;
180 BasicBlock *ReturnBlock;
181 };
182
184};
185
186struct PartialInlinerImpl {
187
188 PartialInlinerImpl(
189 function_ref<AssumptionCache &(Function &)> GetAC,
190 function_ref<AssumptionCache *(Function &)> LookupAC,
191 function_ref<TargetTransformInfo &(Function &)> GTTI,
192 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
193 ProfileSummaryInfo &ProfSI,
194 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
195 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
196 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
197
198 bool run(Module &M);
199 // Main part of the transformation that calls helper functions to find
200 // outlining candidates, clone & outline the function, and attempt to
201 // partially inline the resulting function. Returns true if
202 // inlining was successful, false otherwise. Also returns the outline
203 // function (only if we partially inlined early returns) as there is a
204 // possibility to further "peel" early return statements that were left in the
205 // outline function due to code size.
206 std::pair<bool, Function *> unswitchFunction(Function &F);
207
208 // This class speculatively clones the function to be partial inlined.
209 // At the end of partial inlining, the remaining callsites to the cloned
210 // function that are not partially inlined will be fixed up to reference
211 // the original function, and the cloned function will be erased.
212 struct FunctionCloner {
213 // Two constructors, one for single region outlining, the other for
214 // multi-region outlining.
215 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
216 OptimizationRemarkEmitter &ORE,
217 function_ref<AssumptionCache *(Function &)> LookupAC,
218 function_ref<TargetTransformInfo &(Function &)> GetTTI);
219 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
220 OptimizationRemarkEmitter &ORE,
221 function_ref<AssumptionCache *(Function &)> LookupAC,
222 function_ref<TargetTransformInfo &(Function &)> GetTTI);
223
224 ~FunctionCloner();
225
226 // Prepare for function outlining: making sure there is only
227 // one incoming edge from the extracted/outlined region to
228 // the return block.
229 void normalizeReturnBlock() const;
230
231 // Do function outlining for cold regions.
232 bool doMultiRegionFunctionOutlining();
233 // Do function outlining for region after early return block(s).
234 // NOTE: For vararg functions that do the vararg handling in the outlined
235 // function, we temporarily generate IR that does not properly
236 // forward varargs to the outlined function. Calling InlineFunction
237 // will update calls to the outlined functions to properly forward
238 // the varargs.
239 Function *doSingleRegionFunctionOutlining();
240
241 Function *OrigFunc = nullptr;
242 Function *ClonedFunc = nullptr;
243
244 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
245 // Keep track of Outlined Functions and the basic block they're called from.
246 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
247
248 // ClonedFunc is inlined in one of its callers after function
249 // outlining.
250 bool IsFunctionInlined = false;
251 // The cost of the region to be outlined.
252 InstructionCost OutlinedRegionCost = 0;
253 // ClonedOI is specific to outlining non-early return blocks.
254 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
255 // ClonedOMRI is specific to outlining cold regions.
256 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
257 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
258 OptimizationRemarkEmitter &ORE;
259 function_ref<AssumptionCache *(Function &)> LookupAC;
260 function_ref<TargetTransformInfo &(Function &)> GetTTI;
261 };
262
263private:
264 int NumPartialInlining = 0;
265 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
266 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
267 function_ref<TargetTransformInfo &(Function &)> GetTTI;
268 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
269 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
270 ProfileSummaryInfo &PSI;
271
272 // Return the frequency of the OutlininingBB relative to F's entry point.
273 // The result is no larger than 1 and is represented using BP.
274 // (Note that the outlined region's 'head' block can only have incoming
275 // edges from the guarding entry blocks).
276 BranchProbability
277 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
278
279 // Return true if the callee of CB should be partially inlined with
280 // profit.
281 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
282 BlockFrequency WeightedOutliningRcost,
283 OptimizationRemarkEmitter &ORE) const;
284
285 // Try to inline DuplicateFunction (cloned from F with call to
286 // the OutlinedFunction into its callers. Return true
287 // if there is any successful inlining.
288 bool tryPartialInline(FunctionCloner &Cloner);
289
290 // Compute the mapping from use site of DuplicationFunction to the enclosing
291 // BB's profile count.
292 void
293 computeCallsiteToProfCountMap(Function *DuplicateFunction,
294 DenseMap<User *, uint64_t> &SiteCountMap) const;
295
296 bool isLimitReached() const {
297 return (MaxNumPartialInlining != -1 &&
298 NumPartialInlining >= MaxNumPartialInlining);
299 }
300
301 static CallBase *getSupportedCallBase(User *U) {
302 if (isa<CallInst>(U) || isa<InvokeInst>(U))
303 return cast<CallBase>(U);
304 llvm_unreachable("All uses must be calls");
305 return nullptr;
306 }
307
308 static CallBase *getOneCallSiteTo(Function &F) {
309 User *User = *F.user_begin();
310 return getSupportedCallBase(User);
311 }
312
313 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
314 CallBase *CB = getOneCallSiteTo(F);
315 DebugLoc DLoc = CB->getDebugLoc();
316 BasicBlock *Block = CB->getParent();
317 return std::make_tuple(DLoc, Block);
318 }
319
320 // Returns the costs associated with function outlining:
321 // - The first value is the non-weighted runtime cost for making the call
322 // to the outlined function, including the addtional setup cost in the
323 // outlined function itself;
324 // - The second value is the estimated size of the new call sequence in
325 // basic block Cloner.OutliningCallBB;
326 std::tuple<InstructionCost, InstructionCost>
327 computeOutliningCosts(FunctionCloner &Cloner) const;
328
329 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
330 // approximate both the size and runtime cost (Note that in the current
331 // inline cost analysis, there is no clear distinction there either).
332 static InstructionCost computeBBInlineCost(BasicBlock *BB,
333 TargetTransformInfo *TTI);
334
335 std::unique_ptr<FunctionOutliningInfo>
336 computeOutliningInfo(Function &F) const;
337
338 std::unique_ptr<FunctionOutliningMultiRegionInfo>
339 computeOutliningColdRegionsInfo(Function &F,
340 OptimizationRemarkEmitter &ORE) const;
341};
342
343} // end anonymous namespace
344
345std::unique_ptr<FunctionOutliningMultiRegionInfo>
346PartialInlinerImpl::computeOutliningColdRegionsInfo(
347 Function &F, OptimizationRemarkEmitter &ORE) const {
348 BasicBlock *EntryBlock = &F.front();
349
350 DominatorTree DT(F);
351 LoopInfo LI(DT);
352 BranchProbabilityInfo BPI(F, LI);
353 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
354 BlockFrequencyInfo *BFI;
355 if (!GetBFI) {
356 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
357 BFI = ScopedBFI.get();
358 } else
359 BFI = &(GetBFI(F));
360
361 // Return if we don't have profiling information.
362 if (!PSI.hasInstrumentationProfile())
363 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
364
365 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
366 std::make_unique<FunctionOutliningMultiRegionInfo>();
367
368 auto IsSingleExit =
369 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
370 BasicBlock *ExitBlock = nullptr;
371 for (auto *Block : BlockList) {
372 for (BasicBlock *Succ : successors(Block)) {
373 if (!is_contained(BlockList, Succ)) {
374 if (ExitBlock) {
375 ORE.emit([&]() {
376 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
377 &Succ->front())
378 << "Region dominated by "
379 << ore::NV("Block", BlockList.front()->getName())
380 << " has more than one region exit edge.";
381 });
382 return nullptr;
383 }
384
385 ExitBlock = Block;
386 }
387 }
388 }
389 return ExitBlock;
390 };
391
392 auto BBProfileCount = [BFI](BasicBlock *BB) {
393 return BFI->getBlockProfileCount(BB).value_or(0);
394 };
395
396 // Use the same computeBBInlineCost function to compute the cost savings of
397 // the outlining the candidate region.
398 TargetTransformInfo *FTTI = &GetTTI(F);
399 InstructionCost OverallFunctionCost = 0;
400 for (auto &BB : F)
401 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
402
403 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
404 << "\n";);
405
406 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
407 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
408
409 BranchProbability MinBranchProbability(
410 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
412 bool ColdCandidateFound = false;
413 BasicBlock *CurrEntry = EntryBlock;
414 std::vector<BasicBlock *> DFS;
415 SmallPtrSet<BasicBlock *, 8> VisitedSet;
416 DFS.push_back(CurrEntry);
417 VisitedSet.insert(CurrEntry);
418
419 // Use Depth First Search on the basic blocks to find CFG edges that are
420 // considered cold.
421 // Cold regions considered must also have its inline cost compared to the
422 // overall inline cost of the original function. The region is outlined only
423 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
424 // more.
425 while (!DFS.empty()) {
426 auto *ThisBB = DFS.back();
427 DFS.pop_back();
428 // Only consider regions with predecessor blocks that are considered
429 // not-cold (default: part of the top 99.99% of all block counters)
430 // AND greater than our minimum block execution count (default: 100).
431 if (PSI.isColdBlock(ThisBB, BFI) ||
432 BBProfileCount(ThisBB) < MinBlockCounterExecution)
433 continue;
434 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
435 if (!VisitedSet.insert(*SI).second)
436 continue;
437 DFS.push_back(*SI);
438 // If branch isn't cold, we skip to the next one.
439 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
440 if (SuccProb > MinBranchProbability)
441 continue;
442
443 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
444 << SI->getName()
445 << "\nBranch Probability = " << SuccProb << "\n";);
446
447 SmallVector<BasicBlock *, 8> DominateVector;
448 DT.getDescendants(*SI, DominateVector);
449 assert(!DominateVector.empty() &&
450 "SI should be reachable and have at least itself as descendant");
451
452 // We can only outline single entry regions (for now).
453 if (!DominateVector.front()->hasNPredecessors(1)) {
454 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
455 << " doesn't have a single predecessor in the "
456 "dominator tree\n";);
457 continue;
458 }
459
460 BasicBlock *ExitBlock = nullptr;
461 // We can only outline single exit regions (for now).
462 if (!(ExitBlock = IsSingleExit(DominateVector))) {
463 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
464 << " doesn't have a unique successor\n";);
465 continue;
466 }
467
468 InstructionCost OutlineRegionCost = 0;
469 for (auto *BB : DominateVector)
470 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
471
472 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
473 << "\n";);
474
475 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
476 ORE.emit([&]() {
477 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
478 &SI->front())
479 << ore::NV("Callee", &F)
480 << " inline cost-savings smaller than "
481 << ore::NV("Cost", MinOutlineRegionCost);
482 });
483
484 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
485 << MinOutlineRegionCost << "\n";);
486 continue;
487 }
488
489 // For now, ignore blocks that belong to a SISE region that is a
490 // candidate for outlining. In the future, we may want to look
491 // at inner regions because the outer region may have live-exit
492 // variables.
493 VisitedSet.insert_range(DominateVector);
494
495 // ReturnBlock here means the block after the outline call
496 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
497 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
498 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
499 OutliningInfo->ORI.push_back(RegInfo);
500 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
501 << DominateVector.front()->getName() << "\n";);
502 ColdCandidateFound = true;
503 NumColdRegionsFound++;
504 }
505 }
506
507 if (ColdCandidateFound)
508 return OutliningInfo;
509
510 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
511}
512
513std::unique_ptr<FunctionOutliningInfo>
514PartialInlinerImpl::computeOutliningInfo(Function &F) const {
515 BasicBlock *EntryBlock = &F.front();
516 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
517 if (!BR || BR->isUnconditional())
518 return std::unique_ptr<FunctionOutliningInfo>();
519
520 // Returns true if Succ is BB's successor
521 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
522 return is_contained(successors(BB), Succ);
523 };
524
525 auto IsReturnBlock = [](BasicBlock *BB) {
526 Instruction *TI = BB->getTerminator();
527 return isa<ReturnInst>(TI);
528 };
529
530 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
531 if (IsReturnBlock(Succ1))
532 return std::make_tuple(Succ1, Succ2);
533 if (IsReturnBlock(Succ2))
534 return std::make_tuple(Succ2, Succ1);
535
536 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
537 };
538
539 // Detect a triangular shape:
540 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
541 if (IsSuccessor(Succ1, Succ2))
542 return std::make_tuple(Succ1, Succ2);
543 if (IsSuccessor(Succ2, Succ1))
544 return std::make_tuple(Succ2, Succ1);
545
546 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
547 };
548
549 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
550 std::make_unique<FunctionOutliningInfo>();
551
552 BasicBlock *CurrEntry = EntryBlock;
553 bool CandidateFound = false;
554 do {
555 // The number of blocks to be inlined has already reached
556 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
557 // disables partial inlining for the function.
558 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
559 break;
560
561 if (succ_size(CurrEntry) != 2)
562 break;
563
564 BasicBlock *Succ1 = *succ_begin(CurrEntry);
565 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
566
567 BasicBlock *ReturnBlock, *NonReturnBlock;
568 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
569
570 if (ReturnBlock) {
571 OutliningInfo->Entries.push_back(CurrEntry);
572 OutliningInfo->ReturnBlock = ReturnBlock;
573 OutliningInfo->NonReturnBlock = NonReturnBlock;
574 CandidateFound = true;
575 break;
576 }
577
578 BasicBlock *CommSucc, *OtherSucc;
579 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
580
581 if (!CommSucc)
582 break;
583
584 OutliningInfo->Entries.push_back(CurrEntry);
585 CurrEntry = OtherSucc;
586 } while (true);
587
588 if (!CandidateFound)
589 return std::unique_ptr<FunctionOutliningInfo>();
590
591 // There should not be any successors (not in the entry set) other than
592 // {ReturnBlock, NonReturnBlock}
593 assert(OutliningInfo->Entries[0] == &F.front() &&
594 "Function Entry must be the first in Entries vector");
595 DenseSet<BasicBlock *> Entries(llvm::from_range, OutliningInfo->Entries);
596
597 // Returns true of BB has Predecessor which is not
598 // in Entries set.
599 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
600 for (auto *Pred : predecessors(BB)) {
601 if (!Entries.count(Pred))
602 return true;
603 }
604 return false;
605 };
606 auto CheckAndNormalizeCandidate =
607 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
608 for (BasicBlock *E : OutliningInfo->Entries) {
609 for (auto *Succ : successors(E)) {
610 if (Entries.count(Succ))
611 continue;
612 if (Succ == OutliningInfo->ReturnBlock)
613 OutliningInfo->ReturnBlockPreds.push_back(E);
614 else if (Succ != OutliningInfo->NonReturnBlock)
615 return false;
616 }
617 // There should not be any outside incoming edges either:
618 if (HasNonEntryPred(E))
619 return false;
620 }
621 return true;
622 };
623
624 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
625 return std::unique_ptr<FunctionOutliningInfo>();
626
627 // Now further growing the candidate's inlining region by
628 // peeling off dominating blocks from the outlining region:
629 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
630 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
631 if (succ_size(Cand) != 2)
632 break;
633
634 if (HasNonEntryPred(Cand))
635 break;
636
637 BasicBlock *Succ1 = *succ_begin(Cand);
638 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
639
640 BasicBlock *ReturnBlock, *NonReturnBlock;
641 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
642 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
643 break;
644
645 if (NonReturnBlock->getSinglePredecessor() != Cand)
646 break;
647
648 // Now grow and update OutlininigInfo:
649 OutliningInfo->Entries.push_back(Cand);
650 OutliningInfo->NonReturnBlock = NonReturnBlock;
651 OutliningInfo->ReturnBlockPreds.push_back(Cand);
652 Entries.insert(Cand);
653 }
654
655 return OutliningInfo;
656}
657
658// Check if there is PGO data or user annotated branch data:
659static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
660 if (F.hasProfileData())
661 return true;
662 // Now check if any of the entry block has MD_prof data:
663 for (auto *E : OI.Entries) {
664 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
665 if (!BR || BR->isUnconditional())
666 continue;
667 if (hasBranchWeightMD(*BR))
668 return true;
669 }
670 return false;
671}
672
673BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
674 FunctionCloner &Cloner) const {
675 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
676 auto EntryFreq =
677 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
678 auto OutliningCallFreq =
679 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
680 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
681 // we outlined any regions, so we may encounter situations where the
682 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
683 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
684 OutliningCallFreq = EntryFreq;
685
686 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
687 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
688
689 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
690 return OutlineRegionRelFreq;
691
692 // When profile data is not available, we need to be conservative in
693 // estimating the overall savings. Static branch prediction can usually
694 // guess the branch direction right (taken/non-taken), but the guessed
695 // branch probability is usually not biased enough. In case when the
696 // outlined region is predicted to be likely, its probability needs
697 // to be made higher (more biased) to not under-estimate the cost of
698 // function outlining. On the other hand, if the outlined region
699 // is predicted to be less likely, the predicted probablity is usually
700 // higher than the actual. For instance, the actual probability of the
701 // less likely target is only 5%, but the guessed probablity can be
702 // 40%. In the latter case, there is no need for further adjustment.
703 // FIXME: add an option for this.
704 if (OutlineRegionRelFreq < BranchProbability(45, 100))
705 return OutlineRegionRelFreq;
706
707 OutlineRegionRelFreq = std::max(
708 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
709
710 return OutlineRegionRelFreq;
711}
712
713bool PartialInlinerImpl::shouldPartialInline(
714 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
715 OptimizationRemarkEmitter &ORE) const {
716 using namespace ore;
717
719 assert(Callee == Cloner.ClonedFunc);
720
722 return isInlineViable(*Callee).isSuccess();
723
724 Function *Caller = CB.getCaller();
725 auto &CalleeTTI = GetTTI(*Callee);
726 bool RemarksEnabled =
727 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
728 DEBUG_TYPE);
729 InlineCost IC =
730 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
731 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
732
733 if (IC.isAlways()) {
734 ORE.emit([&]() {
735 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
736 << NV("Callee", Cloner.OrigFunc)
737 << " should always be fully inlined, not partially";
738 });
739 return false;
740 }
741
742 if (IC.isNever()) {
743 ORE.emit([&]() {
744 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
745 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
746 << NV("Caller", Caller)
747 << " because it should never be inlined (cost=never)";
748 });
749 return false;
750 }
751
752 if (!IC) {
753 ORE.emit([&]() {
754 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
755 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
756 << NV("Caller", Caller) << " because too costly to inline (cost="
757 << NV("Cost", IC.getCost()) << ", threshold="
758 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
759 });
760 return false;
761 }
762 const DataLayout &DL = Caller->getDataLayout();
763
764 // The savings of eliminating the call:
765 int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
766 BlockFrequency NormWeightedSavings(NonWeightedSavings);
767
768 // Weighted saving is smaller than weighted cost, return false
769 if (NormWeightedSavings < WeightedOutliningRcost) {
770 ORE.emit([&]() {
771 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
772 &CB)
773 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
774 << NV("Caller", Caller) << " runtime overhead (overhead="
775 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
776 << ", savings="
777 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
778 << ")"
779 << " of making the outlined call is too high";
780 });
781
782 return false;
783 }
784
785 ORE.emit([&]() {
786 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
787 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
788 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
789 << " (threshold="
790 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
791 });
792 return true;
793}
794
795// TODO: Ideally we should share Inliner's InlineCost Analysis code.
796// For now use a simplified version. The returned 'InlineCost' will be used
797// to esimate the size cost as well as runtime cost of the BB.
799PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
800 TargetTransformInfo *TTI) {
801 InstructionCost InlineCost = 0;
802 const DataLayout &DL = BB->getDataLayout();
804 for (Instruction &I : BB->instructionsWithoutDebug()) {
805 // Skip free instructions.
806 switch (I.getOpcode()) {
807 case Instruction::BitCast:
808 case Instruction::PtrToInt:
809 case Instruction::IntToPtr:
810 case Instruction::Alloca:
811 case Instruction::PHI:
812 continue;
813 case Instruction::GetElementPtr:
814 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
815 continue;
816 break;
817 default:
818 break;
819 }
820
821 if (I.isLifetimeStartOrEnd())
822 continue;
823
824 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
825 Intrinsic::ID IID = II->getIntrinsicID();
827 FastMathFlags FMF;
828 for (Value *Val : II->args())
829 Tys.push_back(Val->getType());
830
831 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
832 FMF = FPMO->getFastMathFlags();
833
834 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
836 continue;
837 }
838
839 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
840 InlineCost += getCallsiteCost(*TTI, *CI, DL);
841 continue;
842 }
843
844 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
845 InlineCost += getCallsiteCost(*TTI, *II, DL);
846 continue;
847 }
848
849 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
850 InlineCost += (SI->getNumCases() + 1) * InstrCost;
851 continue;
852 }
853 InlineCost += InstrCost;
854 }
855
856 return InlineCost;
857}
858
859std::tuple<InstructionCost, InstructionCost>
860PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
861 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
862 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
863 Function *OutlinedFunc = FuncBBPair.first;
864 BasicBlock* OutliningCallBB = FuncBBPair.second;
865 // Now compute the cost of the call sequence to the outlined function
866 // 'OutlinedFunction' in BB 'OutliningCallBB':
867 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
868 OutliningFuncCallCost +=
869 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
870
871 // Now compute the cost of the extracted/outlined function itself:
872 for (BasicBlock &BB : *OutlinedFunc)
873 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
874 }
875 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
876 "Outlined function cost should be no less than the outlined region");
877
878 // The code extractor introduces a new root and exit stub blocks with
879 // additional unconditional branches. Those branches will be eliminated
880 // later with bb layout. The cost should be adjusted accordingly:
881 OutlinedFunctionCost -=
882 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
883
884 InstructionCost OutliningRuntimeOverhead =
885 OutliningFuncCallCost +
886 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
888
889 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
890}
891
892// Create the callsite to profile count map which is
893// used to update the original function's entry count,
894// after the function is partially inlined into the callsite.
895void PartialInlinerImpl::computeCallsiteToProfCountMap(
896 Function *DuplicateFunction,
897 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
898 std::vector<User *> Users(DuplicateFunction->user_begin(),
899 DuplicateFunction->user_end());
900 Function *CurrentCaller = nullptr;
901 std::unique_ptr<BlockFrequencyInfo> TempBFI;
902 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
903
904 auto ComputeCurrBFI = [&,this](Function *Caller) {
905 // For the old pass manager:
906 if (!GetBFI) {
907 DominatorTree DT(*Caller);
908 LoopInfo LI(DT);
909 BranchProbabilityInfo BPI(*Caller, LI);
910 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
911 CurrentCallerBFI = TempBFI.get();
912 } else {
913 // New pass manager:
914 CurrentCallerBFI = &(GetBFI(*Caller));
915 }
916 };
917
918 for (User *User : Users) {
919 CallBase *CB = getSupportedCallBase(User);
920 Function *Caller = CB->getCaller();
921 if (CurrentCaller != Caller) {
922 CurrentCaller = Caller;
923 ComputeCurrBFI(Caller);
924 } else {
925 assert(CurrentCallerBFI && "CallerBFI is not set");
926 }
927 BasicBlock *CallBB = CB->getParent();
928 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
929 if (Count)
930 CallSiteToProfCountMap[User] = *Count;
931 else
932 CallSiteToProfCountMap[User] = 0;
933 }
934}
935
936PartialInlinerImpl::FunctionCloner::FunctionCloner(
937 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
938 function_ref<AssumptionCache *(Function &)> LookupAC,
939 function_ref<TargetTransformInfo &(Function &)> GetTTI)
940 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
941 ClonedOI = std::make_unique<FunctionOutliningInfo>();
942
943 // Clone the function, so that we can hack away on it.
945 ClonedFunc = CloneFunction(F, VMap);
946
947 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
948 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
949 for (BasicBlock *BB : OI->Entries)
950 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
951
952 for (BasicBlock *E : OI->ReturnBlockPreds) {
953 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
954 ClonedOI->ReturnBlockPreds.push_back(NewE);
955 }
956 // Go ahead and update all uses to the duplicate, so that we can just
957 // use the inliner functionality when we're done hacking.
958 F->replaceAllUsesWith(ClonedFunc);
959}
960
961PartialInlinerImpl::FunctionCloner::FunctionCloner(
962 Function *F, FunctionOutliningMultiRegionInfo *OI,
966 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
967 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
968
969 // Clone the function, so that we can hack away on it.
971 ClonedFunc = CloneFunction(F, VMap);
972
973 // Go through all Outline Candidate Regions and update all BasicBlock
974 // information.
975 for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
976 OI->ORI) {
978 for (BasicBlock *BB : RegionInfo.Region)
979 Region.push_back(cast<BasicBlock>(VMap[BB]));
980
981 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
982 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
983 BasicBlock *NewReturnBlock = nullptr;
984 if (RegionInfo.ReturnBlock)
985 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
986 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
987 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
988 ClonedOMRI->ORI.push_back(MappedRegionInfo);
989 }
990 // Go ahead and update all uses to the duplicate, so that we can just
991 // use the inliner functionality when we're done hacking.
992 F->replaceAllUsesWith(ClonedFunc);
993}
994
995void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
996 auto GetFirstPHI = [](BasicBlock *BB) {
997 BasicBlock::iterator I = BB->begin();
998 PHINode *FirstPhi = nullptr;
999 while (I != BB->end()) {
1000 PHINode *Phi = dyn_cast<PHINode>(I);
1001 if (!Phi)
1002 break;
1003 if (!FirstPhi) {
1004 FirstPhi = Phi;
1005 break;
1006 }
1007 }
1008 return FirstPhi;
1009 };
1010
1011 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1012 // blocks.
1013 if (!ClonedOI)
1014 return;
1015
1016 // Special hackery is needed with PHI nodes that have inputs from more than
1017 // one extracted block. For simplicity, just split the PHIs into a two-level
1018 // sequence of PHIs, some of which will go in the extracted region, and some
1019 // of which will go outside.
1020 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1021 // only split block when necessary:
1022 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1023 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1024
1025 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1026 return;
1027
1028 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1029 if (llvm::all_equal(PN->incoming_values()))
1030 return PN->getIncomingValue(0);
1031 return nullptr;
1032 };
1033
1034 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1035 ClonedOI->ReturnBlock->getFirstNonPHIIt());
1036 BasicBlock::iterator I = PreReturn->begin();
1037 BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
1038 SmallVector<Instruction *, 4> DeadPhis;
1039 while (I != PreReturn->end()) {
1040 PHINode *OldPhi = dyn_cast<PHINode>(I);
1041 if (!OldPhi)
1042 break;
1043
1044 PHINode *RetPhi =
1045 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1046 RetPhi->insertBefore(Ins);
1047 OldPhi->replaceAllUsesWith(RetPhi);
1048 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1049
1050 RetPhi->addIncoming(&*I, PreReturn);
1051 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1052 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1053 OldPhi->removeIncomingValue(E);
1054 }
1055
1056 // After incoming values splitting, the old phi may become trivial.
1057 // Keeping the trivial phi can introduce definition inside the outline
1058 // region which is live-out, causing necessary overhead (load, store
1059 // arg passing etc).
1060 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1061 OldPhi->replaceAllUsesWith(OldPhiVal);
1062 DeadPhis.push_back(OldPhi);
1063 }
1064 ++I;
1065 }
1066 for (auto *DP : DeadPhis)
1067 DP->eraseFromParent();
1068
1069 for (auto *E : ClonedOI->ReturnBlockPreds)
1070 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1071}
1072
1073bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1074
1075 auto ComputeRegionCost =
1076 [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1078 for (BasicBlock* BB : Region)
1079 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1080 return Cost;
1081 };
1082
1083 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1084
1085 if (ClonedOMRI->ORI.empty())
1086 return false;
1087
1088 // The CodeExtractor needs a dominator tree.
1089 DominatorTree DT;
1090 DT.recalculate(*ClonedFunc);
1091
1092 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1093 LoopInfo LI(DT);
1094 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1095 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1096
1097 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1098 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1099
1100 SetVector<Value *> Inputs, Outputs, Sinks;
1101 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1102 ClonedOMRI->ORI) {
1103 InstructionCost CurrentOutlinedRegionCost =
1104 ComputeRegionCost(RegionInfo.Region);
1105
1106 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1107 ClonedFuncBFI.get(), &BPI,
1108 LookupAC(*RegionInfo.EntryBlock->getParent()),
1109 /* AllowVarargs */ false);
1110
1111 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1112
1113 LLVM_DEBUG({
1114 dbgs() << "inputs: " << Inputs.size() << "\n";
1115 dbgs() << "outputs: " << Outputs.size() << "\n";
1116 for (Value *value : Inputs)
1117 dbgs() << "value used in func: " << *value << "\n";
1118 for (Value *output : Outputs)
1119 dbgs() << "instr used in func: " << *output << "\n";
1120 });
1121
1122 // Do not extract regions that have live exit variables.
1123 if (Outputs.size() > 0 && !ForceLiveExit)
1124 continue;
1125
1126 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1127 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1128 BasicBlock *OutliningCallBB = OCS->getParent();
1129 assert(OutliningCallBB->getParent() == ClonedFunc);
1130 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1131 NumColdRegionsOutlined++;
1132 OutlinedRegionCost += CurrentOutlinedRegionCost;
1133
1134 if (MarkOutlinedColdCC) {
1135 OutlinedFunc->setCallingConv(CallingConv::Cold);
1136 OCS->setCallingConv(CallingConv::Cold);
1137 }
1138 } else
1139 ORE.emit([&]() {
1140 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1141 &RegionInfo.Region.front()->front())
1142 << "Failed to extract region at block "
1143 << ore::NV("Block", RegionInfo.Region.front());
1144 });
1145 }
1146
1147 return !OutlinedFunctions.empty();
1148}
1149
1150Function *
1151PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1152 // Returns true if the block is to be partial inlined into the caller
1153 // (i.e. not to be extracted to the out of line function)
1154 auto ToBeInlined = [&, this](BasicBlock *BB) {
1155 return BB == ClonedOI->ReturnBlock ||
1156 llvm::is_contained(ClonedOI->Entries, BB);
1157 };
1158
1159 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1160 // The CodeExtractor needs a dominator tree.
1161 DominatorTree DT;
1162 DT.recalculate(*ClonedFunc);
1163
1164 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1165 LoopInfo LI(DT);
1166 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1167 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1168
1169 // Gather up the blocks that we're going to extract.
1170 std::vector<BasicBlock *> ToExtract;
1171 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1172 ToExtract.push_back(ClonedOI->NonReturnBlock);
1173 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1174 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1175 for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
1176 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1177 ToExtract.push_back(BB);
1178 // FIXME: the code extractor may hoist/sink more code
1179 // into the outlined function which may make the outlining
1180 // overhead (the difference of the outlined function cost
1181 // and OutliningRegionCost) look larger.
1182 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1183 }
1184
1185 // Extract the body of the if.
1186 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1187 Function *OutlinedFunc =
1188 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1189 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1190 /* AllowVarargs */ true)
1191 .extractCodeRegion(CEAC);
1192
1193 if (OutlinedFunc) {
1194 BasicBlock *OutliningCallBB =
1195 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1196 assert(OutliningCallBB->getParent() == ClonedFunc);
1197 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1198 } else
1199 ORE.emit([&]() {
1200 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1201 &ToExtract.front()->front())
1202 << "Failed to extract region at block "
1203 << ore::NV("Block", ToExtract.front());
1204 });
1205
1206 return OutlinedFunc;
1207}
1208
1209PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1210 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1211 // users (function pointers, etc.) back to the original function.
1212 ClonedFunc->replaceAllUsesWith(OrigFunc);
1213 ClonedFunc->eraseFromParent();
1214 if (!IsFunctionInlined) {
1215 // Remove each function that was speculatively created if there is no
1216 // reference.
1217 for (auto FuncBBPair : OutlinedFunctions) {
1218 Function *Func = FuncBBPair.first;
1219 Func->eraseFromParent();
1220 }
1221 }
1222}
1223
1224std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1225 if (F.hasAddressTaken())
1226 return {false, nullptr};
1227
1228 // Let inliner handle it
1229 if (F.hasFnAttribute(Attribute::AlwaysInline))
1230 return {false, nullptr};
1231
1232 if (F.hasFnAttribute(Attribute::NoInline))
1233 return {false, nullptr};
1234
1235 if (PSI.isFunctionEntryCold(&F))
1236 return {false, nullptr};
1237
1238 if (F.users().empty())
1239 return {false, nullptr};
1240
1241 OptimizationRemarkEmitter ORE(&F);
1242
1243 // Only try to outline cold regions if we have a profile summary, which
1244 // implies we have profiling information.
1245 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1247 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1248 computeOutliningColdRegionsInfo(F, ORE);
1249 if (OMRI) {
1250 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1251
1252 LLVM_DEBUG({
1253 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1254 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1255 << "\n";
1256 });
1257
1258 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1259
1260 if (DidOutline) {
1261 LLVM_DEBUG({
1262 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1263 Cloner.ClonedFunc->print(dbgs());
1264 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1265 });
1266
1267 if (tryPartialInline(Cloner))
1268 return {true, nullptr};
1269 }
1270 }
1271 }
1272
1273 // Fall-thru to regular partial inlining if we:
1274 // i) can't find any cold regions to outline, or
1275 // ii) can't inline the outlined function anywhere.
1276 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1277 if (!OI)
1278 return {false, nullptr};
1279
1280 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1281 Cloner.normalizeReturnBlock();
1282
1283 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1284
1285 if (!OutlinedFunction)
1286 return {false, nullptr};
1287
1288 if (tryPartialInline(Cloner))
1289 return {true, OutlinedFunction};
1290
1291 return {false, nullptr};
1292}
1293
1294bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1295 if (Cloner.OutlinedFunctions.empty())
1296 return false;
1297
1298 auto OutliningCosts = computeOutliningCosts(Cloner);
1299
1300 InstructionCost SizeCost = std::get<0>(OutliningCosts);
1301 InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1302
1303 assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1304 "Expected valid costs");
1305
1306 // Only calculate RelativeToEntryFreq when we are doing single region
1307 // outlining.
1308 BranchProbability RelativeToEntryFreq;
1309 if (Cloner.ClonedOI)
1310 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1311 else
1312 // RelativeToEntryFreq doesn't make sense when we have more than one
1313 // outlined call because each call will have a different relative frequency
1314 // to the entry block. We can consider using the average, but the
1315 // usefulness of that information is questionable. For now, assume we never
1316 // execute the calls to outlined functions.
1317 RelativeToEntryFreq = BranchProbability(0, 1);
1318
1319 BlockFrequency WeightedRcost =
1320 BlockFrequency(NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1321
1322 // The call sequence(s) to the outlined function(s) are larger than the sum of
1323 // the original outlined region size(s), it does not increase the chances of
1324 // inlining the function with outlining (The inliner uses the size increase to
1325 // model the cost of inlining a callee).
1326 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1327 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1328 DebugLoc DLoc;
1330 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1331 OrigFuncORE.emit([&]() {
1332 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1333 DLoc, Block)
1334 << ore::NV("Function", Cloner.OrigFunc)
1335 << " not partially inlined into callers (Original Size = "
1336 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1337 << ", Size of call sequence to outlined function = "
1338 << ore::NV("NewSize", SizeCost) << ")";
1339 });
1340 return false;
1341 }
1342
1343 assert(Cloner.OrigFunc->users().empty() &&
1344 "F's users should all be replaced!");
1345
1346 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1347 Cloner.ClonedFunc->user_end());
1348
1349 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1350 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1351 if (CalleeEntryCount)
1352 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1353
1354 uint64_t CalleeEntryCountV =
1355 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1356
1357 bool AnyInline = false;
1358 for (User *User : Users) {
1359 CallBase *CB = getSupportedCallBase(User);
1360
1361 if (isLimitReached())
1362 continue;
1363
1364 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1365 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1366 continue;
1367
1368 // Construct remark before doing the inlining, as after successful inlining
1369 // the callsite is removed.
1370 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1371 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1372 << ore::NV("Caller", CB->getCaller());
1373
1374 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1375 // We can only forward varargs when we outlined a single region, else we
1376 // bail on vararg functions.
1377 if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1378 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1379 : nullptr))
1380 .isSuccess())
1381 continue;
1382
1383 CallerORE.emit(OR);
1384
1385 // Now update the entry count:
1386 if (CalleeEntryCountV) {
1387 if (auto It = CallSiteToProfCountMap.find(User);
1388 It != CallSiteToProfCountMap.end()) {
1389 uint64_t CallSiteCount = It->second;
1390 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1391 }
1392 }
1393
1394 AnyInline = true;
1395 NumPartialInlining++;
1396 // Update the stats
1397 if (Cloner.ClonedOI)
1398 NumPartialInlined++;
1399 else
1400 NumColdOutlinePartialInlined++;
1401 }
1402
1403 if (AnyInline) {
1404 Cloner.IsFunctionInlined = true;
1405 if (CalleeEntryCount)
1406 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1407 CalleeEntryCountV, CalleeEntryCount->getType()));
1408 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1409 OrigFuncORE.emit([&]() {
1410 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1411 << "Partially inlined into at least one caller";
1412 });
1413 }
1414
1415 return AnyInline;
1416}
1417
1418bool PartialInlinerImpl::run(Module &M) {
1420 return false;
1421
1422 std::vector<Function *> Worklist;
1423 Worklist.reserve(M.size());
1424 for (Function &F : M)
1425 if (!F.use_empty() && !F.isDeclaration())
1426 Worklist.push_back(&F);
1427
1428 bool Changed = false;
1429 while (!Worklist.empty()) {
1430 Function *CurrFunc = Worklist.back();
1431 Worklist.pop_back();
1432
1433 if (CurrFunc->use_empty())
1434 continue;
1435
1436 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1437 if (Result.second)
1438 Worklist.push_back(Result.second);
1439 Changed |= Result.first;
1440 }
1441
1442 return Changed;
1443}
1444
1447 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1448
1449 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1450 return FAM.getResult<AssumptionAnalysis>(F);
1451 };
1452
1453 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1454 return FAM.getCachedResult<AssumptionAnalysis>(F);
1455 };
1456
1457 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1458 return FAM.getResult<BlockFrequencyAnalysis>(F);
1459 };
1460
1461 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1462 return FAM.getResult<TargetIRAnalysis>(F);
1463 };
1464
1465 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1466 return FAM.getResult<TargetLibraryAnalysis>(F);
1467 };
1468
1470
1471 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1472 GetTLI, PSI, GetBFI)
1473 .run(M))
1474 return PreservedAnalyses::none();
1475 return PreservedAnalyses::all();
1476}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
bool isUnconditional() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
iterator end()
Definition DenseMap.h:81
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
void setCallingConv(CallingConv::ID CC)
Definition Function.h:274
int getCost() const
Get the inline cost estimate.
Definition InlineCost.h:146
bool isAlways() const
Definition InlineCost.h:140
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition InlineCost.h:176
bool isNever() const
Definition InlineCost.h:141
bool isSuccess() const
Definition InlineCost.h:190
auto map(const Function &F) const -> InstructionCost
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVM_ABI Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI int getInstrCost()
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
TargetTransformInfo TTI
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1879
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2090
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39