90#include <system_error>
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
102 "Number of functions inlined with context sensitive profile");
104 "Number of functions not inlined with context sensitive profile");
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
109 "Number of inlined callsites with a partial distribution factor");
112 "Number of functions with FDO inline stopped due to min size limit");
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
134 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
138 cl::desc(
"Salvage unused profile by matching with new "
139 "functions on call graph."));
143 cl::desc(
"Compute and report stale profile statistical metrics."));
147 cl::desc(
"Compute stale profile statistical metrics and write it into the "
148 "native object file(.llvm_stats section)."));
152 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
158 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
164 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overridden by profile-sample-accurate. "));
169 cl::desc(
"Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
176 cl::desc(
"Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
182 cl::desc(
"Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
188 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
197 "If true, artificially skip inline transformation in sample-loader "
198 "pass, and merge (or scale) profiles (as configured by "
199 "--sample-profile-merge-inlinee)."));
204 cl::desc(
"Sort profiled recursion by edge weights."));
208 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
209 "loader inlining."));
213 cl::desc(
"The lower bound of size growth limit for "
214 "proirity-based sample profile loader inlining."));
218 cl::desc(
"The upper bound of size growth limit for "
219 "proirity-based sample profile loader inlining."));
223 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
228 cl::desc(
"Threshold for inlining cold callsites"));
234 "Relative hotness percentage threshold for indirect "
235 "call promotion in proirity-based sample profile loader inlining."));
240 "Skip relative hotness check for ICP up to given number of targets."));
244 cl::desc(
"A function is considered hot for staleness error check if its "
245 "total sample count is above the specified percentile"));
249 cl::desc(
"Skip the check if the number of hot functions is smaller than "
250 "the specified number."));
254 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
258 "sample-profile-prioritized-inline",
cl::Hidden,
259 cl::desc(
"Use call site prioritized inlining for sample profile loader. "
260 "Currently only CSSPGO is supported."));
264 cl::desc(
"Use the preinliner decisions stored in profile context."));
267 "sample-profile-recursive-inline",
cl::Hidden,
268 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
272 cl::desc(
"Remove pseudo-probe after sample profile annotation."));
277 "Optimization remarks file containing inline remarks to be replayed "
278 "by inlining from sample profile loader."),
282 "sample-profile-inline-replay-scope",
285 "Replay on functions that have remarks associated "
286 "with them (default)"),
288 "Replay on the entire module")),
289 cl::desc(
"Whether inline replay should be applied to the entire "
290 "Module or just the Functions (default) that are present as "
291 "callers in remarks during sample profile inlining."),
295 "sample-profile-inline-replay-fallback",
300 "All decisions not in replay send to original advisor (default)"),
302 "AlwaysInline",
"All decisions not in replay are inlined"),
304 "All decisions not in replay are not inlined")),
305 cl::desc(
"How sample profile inline replay treats sites that don't come "
306 "from the replay. Original: defers to original advisor, "
307 "AlwaysInline: inline all sites not in replay, NeverInline: "
308 "inline no sites not in replay"),
312 "sample-profile-inline-replay-format",
317 "<Line Number>:<Column Number>"),
319 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
321 "LineColumnDiscriminator",
322 "<Line Number>:<Column Number>.<Discriminator> (default)")),
327 cl::desc(
"Max number of promotions for a single indirect "
328 "call callsite in sample profile loader"));
332 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
336 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
337 "sample-profile inline pass name."));
347using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
352class GUIDToFuncNameMapper {
357 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
361 for (
const auto &
F : CurrentModule) {
363 CurrentGUIDToFuncNameMap.insert(
374 if (CanonName != OrigName)
375 CurrentGUIDToFuncNameMap.insert(
380 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
383 ~GUIDToFuncNameMapper() {
387 CurrentGUIDToFuncNameMap.clear();
391 SetGUIDToFuncNameMapForAll(
nullptr);
396 std::queue<FunctionSamples *> FSToUpdate;
398 FSToUpdate.push(&IFS.second);
401 while (!FSToUpdate.empty()) {
404 FS->GUIDToFuncNameMap = Map;
405 for (
const auto &ICS : FS->getCallsiteSamples()) {
407 for (
const auto &IFS : FSMap) {
409 FSToUpdate.push(&FS);
421struct InlineCandidate {
423 const FunctionSamples *CalleeSamples;
428 uint64_t CallsiteCount;
431 float CallsiteDistribution;
435struct CandidateComparer {
436 bool operator()(
const InlineCandidate &
LHS,
const InlineCandidate &
RHS) {
437 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
438 return LHS.CallsiteCount <
RHS.CallsiteCount;
440 const FunctionSamples *LCS =
LHS.CalleeSamples;
441 const FunctionSamples *RCS =
RHS.CalleeSamples;
456using CandidateQueue =
469 IntrusiveRefCntPtr<vfs::FileSystem> FS,
470 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
471 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
472 std::function<
const TargetLibraryInfo &(Function &)> GetTLI,
473 LazyCallGraph &CG,
bool DisableSampleProfileInlining,
474 bool UseFlattenedProfile)
477 GetAC(std::
move(GetAssumptionCache)),
478 GetTTI(std::
move(GetTargetTransformInfo)), GetTLI(std::
move(GetTLI)),
479 CG(CG), LTOPhase(LTOPhase),
484 DisableSampleProfileInlining(DisableSampleProfileInlining),
485 UseFlattenedProfile(UseFlattenedProfile) {}
489 ProfileSummaryInfo *_PSI);
493 bool emitAnnotations(Function &
F);
494 ErrorOr<uint64_t> getInstWeight(
const Instruction &
I)
override;
495 const FunctionSamples *findCalleeFunctionSamples(
const CallBase &
I)
const;
496 const FunctionSamples *
497 findFunctionSamples(
const Instruction &
I)
const override;
498 std::vector<const FunctionSamples *>
499 findIndirectCallFunctionSamples(
const Instruction &
I, uint64_t &Sum)
const;
500 void findExternalInlineCandidate(CallBase *CB,
const FunctionSamples *Samples,
501 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
504 bool tryPromoteAndInlineCandidate(
505 Function &
F, InlineCandidate &Candidate, uint64_t SumOrigin,
508 bool inlineHotFunctions(Function &
F,
509 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
510 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
511 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
512 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
513 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
515 tryInlineCandidate(InlineCandidate &Candidate,
518 inlineHotFunctionsWithPriority(Function &
F,
519 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
521 bool shouldInlineColdCallee(CallBase &CallInst);
522 void emitOptimizationRemarksForInlineCandidates(
523 const SmallVectorImpl<CallBase *> &Candidates,
const Function &
F,
525 void promoteMergeNotInlinedContextSamples(
526 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
528 std::vector<Function *> buildFunctionOrder(
Module &M, LazyCallGraph &CG);
529 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
530 void generateMDProfMetadata(Function &
F);
531 bool rejectHighStalenessProfile(
Module &M, ProfileSummaryInfo *PSI,
532 const SampleProfileMap &Profiles);
533 void removePseudoProbeInstsDiscriminator(
Module &M);
539 HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;
543 HashKeyMap<std::unordered_map, FunctionId, FunctionId> FuncNameToProfNameMap;
545 std::function<AssumptionCache &(
Function &)> GetAC;
546 std::function<TargetTransformInfo &(
Function &)> GetTTI;
547 std::function<
const TargetLibraryInfo &(
Function &)> GetTLI;
551 std::unique_ptr<SampleContextTracker> ContextTracker;
559 const std::string AnnotatedPassName;
563 std::shared_ptr<ProfileSymbolList> PSL;
568 struct NotInlinedProfileInfo {
571 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
575 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
579 StringSet<> NamesInProfile;
584 llvm::DenseSet<uint64_t> GUIDsInProfile;
591 bool ProfAccForSymsInList;
593 bool DisableSampleProfileInlining;
595 bool UseFlattenedProfile;
598 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
601 std::unique_ptr<SampleProfileMatcher> MatchingManager;
604 const char *getAnnotatedRemarkPassName()
const {
605 return AnnotatedPassName.c_str();
612inline bool SampleProfileInference<Function>::isExit(
const BasicBlock *BB) {
617inline void SampleProfileInference<Function>::findUnlikelyJumps(
618 const std::vector<const BasicBlockT *> &BasicBlocks,
620 for (
auto &Jump :
Func.Jumps) {
621 const auto *BB = BasicBlocks[Jump.Source];
622 const auto *Succ = BasicBlocks[Jump.Target];
626 const auto &Succs = Successors[BB];
627 if (Succs.size() == 2 && Succs.back() == Succ) {
629 Jump.IsUnlikely =
true;
636 Jump.IsUnlikely =
true;
657 return getProbeWeight(Inst);
661 return std::error_code();
667 return std::error_code();
677 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
680 return getInstWeightImpl(Inst);
695const FunctionSamples *
696SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
702 StringRef CalleeName;
704 CalleeName =
Callee->getName();
707 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
709 const FunctionSamples *
FS = findFunctionSamples(Inst);
714 CalleeName, Reader->getRemapper(),
715 &FuncNameToProfNameMap);
721std::vector<const FunctionSamples *>
722SampleProfileLoader::findIndirectCallFunctionSamples(
723 const Instruction &Inst, uint64_t &Sum)
const {
725 std::vector<const FunctionSamples *>
R;
731 auto FSCompare = [](
const FunctionSamples *
L,
const FunctionSamples *
R) {
732 assert(L && R &&
"Expect non-null FunctionSamples");
733 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
734 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
735 return L->getGUID() <
R->getGUID();
740 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
741 if (CalleeSamples.empty())
747 for (
const auto *
const FS : CalleeSamples) {
748 Sum +=
FS->getHeadSamplesEstimate();
755 const FunctionSamples *
FS = findFunctionSamples(Inst);
761 if (
auto T =
FS->findCallTargetMapAt(CallSite))
762 for (
const auto &T_C : *
T)
767 for (
const auto &NameFS : *M) {
768 Sum += NameFS.second.getHeadSamplesEstimate();
769 R.push_back(&NameFS.second);
776const FunctionSamples *
777SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
788 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
791 it.first->second = ContextTracker->getContextSamplesFor(DIL);
793 it.first->second = Samples->findFunctionSamples(
794 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
796 return it.first->second;
812 if (ValueData.empty())
815 unsigned NumPromoted = 0;
816 for (
const auto &V : ValueData) {
858 "If sum is 0, assume only one element in CallTargets "
859 "with count being NOMORE_ICP_MAGICNUM");
861 for (
const auto &V : ValueData)
862 ValueCountMap[V.Value] = V.Count;
868 OldSum -= Pair.first->second;
875 for (
const auto &V : ValueData) {
877 ValueCountMap[V.Value] = V.Count;
880 for (
const auto &
Data : CallTargets) {
887 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
893 for (
const auto &ValueCount : ValueCountMap) {
895 InstrProfValueData{ValueCount.first, ValueCount.second});
899 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
900 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
906 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
919bool SampleProfileLoader::tryPromoteAndInlineCandidate(
920 Function &
F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
923 if (DisableSampleProfileInlining)
930 auto CalleeFunctionName = Candidate.CalleeSamples->
getFunction();
931 auto R = SymbolMap.find(CalleeFunctionName);
932 if (R == SymbolMap.end() || !
R->second)
935 auto &CI = *Candidate.CallInstr;
939 const char *Reason =
"Callee function not available";
946 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
947 R->second->hasFnAttribute(
"use-sample-profile") &&
952 Function::getGUIDAssumingExternalLinkage(
R->second->getName()),
957 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
959 Sum -= Candidate.CallsiteCount;
972 Candidate.CallInstr = DI;
974 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
979 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
987 Candidate.CallInstr->
getName())<<
" because "
993bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
998 if (Callee ==
nullptr)
1007 if (
Cost.isAlways())
1013void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1014 const SmallVectorImpl<CallBase *> &Candidates,
const Function &
F,
1016 for (
auto *
I : Candidates) {
1017 Function *CalledFunction =
I->getCalledFunction();
1018 if (CalledFunction) {
1019 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1020 "InlineAttempt",
I->getDebugLoc(),
1022 <<
"previous inlining reattempted for "
1023 << (
Hot ?
"hotness: '" :
"size: '")
1024 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1030void SampleProfileLoader::findExternalInlineCandidate(
1031 CallBase *CB,
const FunctionSamples *Samples,
1032 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1036 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1040 InlinedGUIDs.
insert(Function::getGUIDAssumingExternalLinkage(
1066 ContextTrieNode *
Caller = ContextTracker->getContextNodeForProfile(Samples);
1067 std::queue<ContextTrieNode *> CalleeList;
1068 CalleeList.push(Caller);
1069 while (!CalleeList.empty()) {
1070 ContextTrieNode *
Node = CalleeList.front();
1072 FunctionSamples *CalleeSample =
Node->getFunctionSamples();
1088 if (!Func ||
Func->isDeclaration())
1094 for (
const auto &TS : BS.second.getCallTargets())
1095 if (TS.second > Threshold) {
1097 if (!Callee ||
Callee->isDeclaration())
1098 InlinedGUIDs.
insert(TS.first.getHashCode());
1105 for (
auto &Child :
Node->getAllChildContext()) {
1106 ContextTrieNode *CalleeNode = &Child.second;
1107 CalleeList.push(CalleeNode);
1134bool SampleProfileLoader::inlineHotFunctions(
1135 Function &
F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1138 assert((!ProfAccForSymsInList ||
1140 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1141 "ProfAccForSymsInList should be false when profile-sample-accurate "
1144 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1146 bool LocalChanged =
true;
1147 while (LocalChanged) {
1148 LocalChanged =
false;
1150 for (
auto &BB :
F) {
1154 for (
auto &
I : BB) {
1155 const FunctionSamples *
FS =
nullptr;
1158 if ((FS = findCalleeFunctionSamples(*CB))) {
1160 "GUIDToFuncNameMap has to be populated");
1162 if (
FS->getHeadSamplesEstimate() > 0 ||
1164 LocalNotInlinedCallSites.
insert({CB,
FS});
1167 else if (shouldInlineColdCallee(*CB))
1169 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1175 if (
Hot || ExternalInlineAdvisor) {
1177 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1180 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1183 for (CallBase *
I : CIS) {
1184 Function *CalledFunction =
I->getCalledFunction();
1185 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1189 if (CalledFunction == &
F)
1191 if (
I->isIndirectCall()) {
1193 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1194 uint64_t SumOrigin = Sum;
1195 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1196 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1197 PSI->getOrCompHotCountThreshold());
1203 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1204 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1205 LocalNotInlinedCallSites.
erase(
I);
1206 LocalChanged =
true;
1209 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1211 if (tryInlineCandidate(Candidate)) {
1212 LocalNotInlinedCallSites.
erase(
I);
1213 LocalChanged =
true;
1215 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1216 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1218 PSI->getOrCompHotCountThreshold());
1227 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1231bool SampleProfileLoader::tryInlineCandidate(
1235 if (DisableSampleProfileInlining)
1238 CallBase &CB = *Candidate.CallInstr;
1240 assert(CalledFunction &&
"Expect a callee with definition");
1244 InlineCost
Cost = shouldInlineCandidate(Candidate);
1245 if (
Cost.isNever()) {
1246 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1247 "InlineFail", DLoc, BB)
1248 <<
"incompatible inlining");
1255 InlineFunctionInfo IFI(GetAC);
1256 IFI.UpdateProfile =
false;
1259 if (!
IR.isSuccess())
1264 Cost,
true, getAnnotatedRemarkPassName());
1267 if (InlinedCallSites) {
1268 InlinedCallSites->
clear();
1273 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1283 if (Candidate.CallsiteDistribution < 1) {
1284 for (
auto &
I : IFI.InlinedCallSites) {
1287 Candidate.CallsiteDistribution);
1289 NumDuplicatedInlinesite++;
1295bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1297 assert(CB &&
"Expect non-null call instruction");
1303 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1306 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1310 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1311 Factor = Probe->Factor;
1313 uint64_t CallsiteCount =
1315 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1319std::optional<InlineCost>
1320SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1321 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1322 if (ExternalInlineAdvisor) {
1323 Advice = ExternalInlineAdvisor->getAdvice(CB);
1325 if (!Advice->isInliningRecommended()) {
1326 Advice->recordUnattemptedInlining();
1329 Advice->recordInlining();
1337bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1338 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1343SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1344 if (std::optional<InlineCost> ReplayCost =
1345 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1351 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1358 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1371 GetTTI(*Callee), GetAC, GetTLI);
1374 if (
Cost.isNever() ||
Cost.isAlways())
1408bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1409 Function &
F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1412 assert((!ProfAccForSymsInList ||
1414 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1415 "ProfAccForSymsInList should be false when profile-sample-accurate "
1420 CandidateQueue CQueue;
1421 InlineCandidate NewCandidate;
1422 for (
auto &BB :
F) {
1423 for (
auto &
I : BB) {
1427 if (getInlineCandidate(&NewCandidate, CB))
1428 CQueue.push(NewCandidate);
1437 "Max inline size limit should not be smaller than min inline size "
1442 if (ExternalInlineAdvisor)
1443 SizeLimit = std::numeric_limits<unsigned>::max();
1445 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1449 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1450 InlineCandidate Candidate = CQueue.top();
1452 CallBase *
I = Candidate.CallInstr;
1453 Function *CalledFunction =
I->getCalledFunction();
1455 if (CalledFunction == &
F)
1457 if (
I->isIndirectCall()) {
1459 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1460 uint64_t SumOrigin = Sum;
1461 Sum *= Candidate.CallsiteDistribution;
1462 unsigned ICPCount = 0;
1463 for (
const auto *FS : CalleeSamples) {
1465 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1466 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1467 PSI->getOrCompHotCountThreshold());
1470 uint64_t EntryCountDistributed =
1471 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1486 if (!PSI->isHotCount(EntryCountDistributed))
1491 Candidate = {
I,
FS, EntryCountDistributed,
1492 Candidate.CallsiteDistribution};
1493 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1494 &InlinedCallSites)) {
1495 for (
auto *CB : InlinedCallSites) {
1496 if (getInlineCandidate(&NewCandidate, CB))
1497 CQueue.emplace(NewCandidate);
1501 }
else if (!ContextTracker) {
1502 LocalNotInlinedCallSites.
insert({
I,
FS});
1505 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1508 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1509 for (
auto *CB : InlinedCallSites) {
1510 if (getInlineCandidate(&NewCandidate, CB))
1511 CQueue.emplace(NewCandidate);
1514 }
else if (!ContextTracker) {
1515 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1517 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1518 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1520 PSI->getOrCompHotCountThreshold());
1524 if (!CQueue.empty()) {
1526 ++NumCSInlinedHitMaxLimit;
1528 ++NumCSInlinedHitMinLimit;
1530 ++NumCSInlinedHitGrowthLimit;
1536 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1540void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1541 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
1542 const Function &
F) {
1544 for (
const auto &Pair : NonInlinedCallSites) {
1545 CallBase *
I = Pair.first;
1547 if (!Callee ||
Callee->isDeclaration())
1551 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
"NotInline",
1552 I->getDebugLoc(),
I->getParent())
1553 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1554 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1557 const FunctionSamples *
FS = Pair.second;
1558 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1572 if (
FS->getHeadSamples() == 0) {
1575 const_cast<FunctionSamples *
>(
FS)->addHeadSamples(
1576 FS->getHeadSamplesEstimate());
1581 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1585 OutlineFS = &OutlineFunctionSamples[
1587 OutlineFS->
merge(*FS, 1);
1593 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1594 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1605 InstrProfValueData{
I.first.getHashCode(),
I.second});
1612void SampleProfileLoader::generateMDProfMetadata(Function &
F) {
1615 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1616 LLVMContext &Ctx =
F.getContext();
1618 for (
auto &BI :
F) {
1621 if (BlockWeights[BB]) {
1622 for (
auto &
I : *BB) {
1629 const DILocation *DIL = DLoc;
1630 const FunctionSamples *
FS = findFunctionSamples(
I);
1634 ErrorOr<SampleRecord::CallTargetMap>
T =
1635 FS->findCallTargetMapAt(CallSite);
1636 if (!
T ||
T.get().empty())
1643 if (Probe->Factor < 1)
1650 for (
const auto &
C :
T.get())
1657 FS->findFunctionSamplesMapAt(CallSite)) {
1658 for (
const auto &NameFS : *M)
1659 Sum += NameFS.second.getHeadSamplesEstimate();
1665 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1674 for (
auto &
I : *BB) {
1677 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1694 << ((BranchLoc) ? Twine(BranchLoc.
getLine())
1695 : Twine(
"<UNKNOWN LOCATION>"))
1697 SmallVector<uint32_t, 4> Weights;
1698 uint32_t MaxWeight = 0;
1703 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1704 std::vector<uint64_t> EdgeIndex;
1709 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1710 EdgeMultiplicity[Succ]++;
1715 Edge E = std::make_pair(BB, Succ);
1716 uint64_t Weight = EdgeWeights[
E];
1721 if (Weight > std::numeric_limits<uint32_t>::max()) {
1723 Weight = std::numeric_limits<uint32_t>::max();
1728 Weights.
push_back(
static_cast<uint32_t
>(
1729 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1734 uint64_t
W = Weight / EdgeMultiplicity[Succ];
1736 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1738 Weights.
push_back(
static_cast<uint32_t
>(W));
1741 if (Weight > MaxWeight) {
1743 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1750 uint64_t TempWeight;
1759 if (MaxWeight > 0 &&
1764 return OptimizationRemark(
DEBUG_TYPE,
"PopularDest", MaxDestInst)
1765 <<
"most popular destination for conditional branches at "
1766 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1785bool SampleProfileLoader::emitAnnotations(Function &
F) {
1790 if (!ProbeManager->getDesc(
F))
1791 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1795 if (ProbeManager->profileIsValid(
F, *Samples)) {
1796 ++NumMatchedProfile;
1798 ++NumMismatchedProfile;
1800 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1801 <<
F.getName() <<
"\n");
1806 if (getFunctionLoc(
F) == 0)
1810 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1813 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1815 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1817 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1819 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1822 generateMDProfMetadata(
F);
1824 emitCoverageRemarks(
F);
1828std::unique_ptr<ProfiledCallGraph>
1829SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1830 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1832 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1834 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1839 for (Function &
F : M) {
1842 ProfiledCG->addProfiledFunction(
1849std::vector<Function *>
1850SampleProfileLoader::buildFunctionOrder(
Module &M, LazyCallGraph &CG) {
1851 std::vector<Function *> FunctionOrderList;
1852 FunctionOrderList.reserve(
M.size());
1855 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1856 "together with -sample-profile-top-down-load.\n";
1868 for (Function &
F : M)
1870 FunctionOrderList.push_back(&
F);
1871 return FunctionOrderList;
1924 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1925 scc_iterator<ProfiledCallGraph *> CGI =
scc_begin(ProfiledCG.get());
1930 scc_member_iterator<ProfiledCallGraph *>
SI(*CGI);
1933 for (
auto *Node :
Range) {
1936 FunctionOrderList.push_back(
F);
1940 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1945 dbgs() <<
"Function processing order:\n";
1946 for (
auto F : FunctionOrderList) {
1947 dbgs() <<
F->getName() <<
"\n";
1951 return FunctionOrderList;
1954bool SampleProfileLoader::doInitialization(
Module &M,
1956 auto &Ctx =
M.getContext();
1959 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1960 if (std::error_code EC = ReaderOrErr.getError()) {
1961 std::string Msg =
"Could not open profile: " +
EC.message();
1962 Ctx.
diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1965 Reader = std::move(ReaderOrErr.get());
1966 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1969 Reader->setModule(&M);
1970 if (std::error_code EC = Reader->read()) {
1971 std::string Msg =
"profile reading failed: " +
EC.message();
1972 Ctx.
diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1976 PSL = Reader->getProfileSymbolList();
1981 if (UseFlattenedProfile)
1983 Reader->profileIsCS());
1986 ProfAccForSymsInList =
1988 if (ProfAccForSymsInList) {
1989 NamesInProfile.
clear();
1990 GUIDsInProfile.
clear();
1991 if (
auto NameTable = Reader->getNameTable()) {
1993 for (
auto Name : *NameTable)
1996 for (
auto Name : *NameTable)
2000 CoverageTracker.setProfAccForSymsInList(
true);
2005 M, *
FAM, Ctx,
nullptr,
2010 false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2014 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2015 Reader->profileIsProbeBased()) {
2031 if (Reader->profileIsPreInlined()) {
2041 if (Reader->profileIsProbeBased()) {
2048 if (!Reader->profileIsCS()) {
2060 if (Reader->profileIsCS()) {
2062 ContextTracker = std::make_unique<SampleContextTracker>(
2063 Reader->getProfiles(), &GUIDToFuncNameMap);
2067 if (Reader->profileIsProbeBased()) {
2068 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2069 if (!ProbeManager->moduleIsProbed(M)) {
2071 "Pseudo-probe-based profile requires SampleProfileProbePass";
2072 Ctx.
diagnose(DiagnosticInfoSampleProfile(
M.getModuleIdentifier(), Msg,
2080 MatchingManager = std::make_unique<SampleProfileMatcher>(
2081 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2082 FuncNameToProfNameMap);
2098bool SampleProfileLoader::rejectHighStalenessProfile(
2099 Module &M, ProfileSummaryInfo *PSI,
const SampleProfileMap &Profiles) {
2101 "Only support for probe-based profile");
2102 uint64_t TotalHotFunc = 0;
2103 uint64_t NumMismatchedFunc = 0;
2104 for (
const auto &
I : Profiles) {
2105 const auto &
FS =
I.second;
2106 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2112 FS.getTotalSamples()))
2116 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2117 NumMismatchedFunc++;
2125 if (NumMismatchedFunc * 100 >=
2127 auto &Ctx =
M.getContext();
2129 "The input profile significantly mismatches current source code. "
2130 "Please recollect profile to avoid performance regression.";
2131 Ctx.
diagnose(DiagnosticInfoSampleProfile(
M.getModuleIdentifier(), Msg));
2137void SampleProfileLoader::removePseudoProbeInstsDiscriminator(
Module &M) {
2139 std::vector<Instruction *> InstsToDel;
2140 for (
auto &BB :
F) {
2141 for (
auto &
I : BB) {
2143 InstsToDel.push_back(&
I);
2145 if (
const DILocation *DIL =
I.getDebugLoc().get()) {
2149 std::optional<uint32_t> DwarfDiscriminator =
2158 for (
auto *
I : InstsToDel)
2159 I->eraseFromParent();
2164 ProfileSummaryInfo *_PSI) {
2165 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2168 if (
M.getProfileSummary(
false) ==
nullptr) {
2169 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2175 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2178 auto Remapper = Reader->getRemapper();
2180 for (
const auto &N_F :
M.getValueSymbolTable()) {
2181 StringRef OrigName = N_F.getKey();
2183 if (
F ==
nullptr || OrigName.
empty())
2185 SymbolMap[FunctionId(OrigName)] =
F;
2187 if (OrigName != NewName && !NewName.
empty()) {
2188 auto r = SymbolMap.emplace(FunctionId(NewName),
F);
2194 r.first->second =
nullptr;
2199 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2200 if (*MapName != OrigName && !MapName->empty())
2201 SymbolMap.emplace(FunctionId(*MapName),
F);
2209 MatchingManager->runOnModule();
2210 MatchingManager->clearMatchingData();
2212 assert(SymbolMap.count(FunctionId()) == 0 &&
2213 "No empty StringRef should be added in SymbolMap");
2215 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2218 bool retval =
false;
2219 for (
auto *
F : buildFunctionOrder(M, CG)) {
2221 clearFunctionData();
2227 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2233 removePseudoProbeInstsDiscriminator(M);
2235 M.eraseNamedMetadata(FuncInfo);
2242 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2243 DILocation2SampleMap.clear();
2248 uint64_t initialEntryCount = -1;
2254 initialEntryCount = 0;
2257 ProfAccForSymsInList =
false;
2259 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2265 if (ProfAccForSymsInList) {
2267 if (PSL->contains(
F.getName()))
2268 initialEntryCount = 0;
2282 GUIDsInProfile.
count(
2283 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2285 initialEntryCount = -1;
2290 if (!
F.getEntryCount())
2292 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2299 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2300 ORE = OwnedORE.get();
2304 Samples = ContextTracker->getBaseSamplesFor(
F);
2306 Samples = Reader->getSamplesFor(
F);
2311 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2312 if (It != OutlineFunctionSamples.end()) {
2313 Samples = &It->second;
2314 }
else if (
auto Remapper = Reader->getRemapper()) {
2315 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2316 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2317 if (It != OutlineFunctionSamples.end())
2318 Samples = &It->second;
2324 if (Samples && !Samples->
empty())
2325 return emitAnnotations(
F);
2331 bool UseFlattenedProfile)
2332 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2333 LTOPhase(LTOPhase), FS(
std::
move(FS)),
2334 DisableSampleProfileInlining(DisableSampleProfileInlining),
2335 UseFlattenedProfile(UseFlattenedProfile) {}
2356 SampleProfileLoader SampleLoader(
2359 : ProfileRemappingFileName,
2360 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2361 DisableSampleProfileInlining, UseFlattenedProfile);
2362 if (!SampleLoader.doInitialization(M, &
FAM))
2366 if (!SampleLoader.runOnModule(M, &AM, PSI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
bool empty() const
Returns true if the analysis manager has an empty results cache.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
LLVM_ABI unsigned getLine() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
void computeDominanceAndLoopInfo(FunctionT &F)
PostDominatorTreePtrT PDT
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
std::pair< typename Base::iterator, bool > insert(StringRef key)
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static LLVM_ABI bool ProfileIsCS
FunctionId getFunction() const
Return the function name.
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
Sample-based profile reader.
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
FunctionAddr VTableAddr Count
Function::ProfileCount ProfileCount
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
A wrapper of binary function with basic blocks and jumps.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)