50#include <unordered_map>
55#define DEBUG_TYPE "memprof-context-disambiguation"
58 "Number of function clones created during whole program analysis");
60 "Number of function clones created during ThinLTO backend");
62 "Number of functions that had clones created during ThinLTO backend");
63STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
64 "cloned) during whole program analysis");
65STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
66 "during whole program analysis");
68 "Number of not cold static allocations (possibly cloned) during "
70STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
71 "(possibly cloned) during ThinLTO backend");
73 "Number of original (not cloned) allocations with memprof profiles "
74 "during ThinLTO backend");
76 AllocVersionsThinBackend,
77 "Number of allocation versions (including clones) during ThinLTO backend");
79 "Maximum number of allocation versions created for an original "
80 "allocation during ThinLTO backend");
82 "Number of unclonable ambigous allocations during ThinLTO backend");
84 "Number of edges removed due to mismatched callees (profiled vs IR)");
86 "Number of profiled callees found via tail calls");
88 "Aggregate depth of profiled callees found via tail calls");
90 "Maximum depth of profiled callees found via tail calls");
92 "Number of profiled callees found via multiple tail call chains");
93STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
94STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
95STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
97 "Number of missing alloc nodes for context ids");
99 "Number of calls skipped during cloning due to unexpected operand");
101 "Number of callsites assigned to call multiple non-matching clones");
102STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
103STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
104STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
109 cl::desc(
"Specify the path prefix of the MemProf dot files."));
113 cl::desc(
"Export graph to dot files."));
118 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
128 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
133 "Export only nodes with contexts feeding given "
134 "-memprof-dot-alloc-id"),
136 "Export only nodes with given -memprof-dot-context-id")));
140 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
141 "or to highlight if -memprof-dot-scope=all"));
145 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
146 "highlight otherwise"));
150 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
154 cl::desc(
"Perform verification checks on CallingContextGraph."));
158 cl::desc(
"Perform frequent verification checks on nodes."));
161 "memprof-import-summary",
162 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
168 cl::desc(
"Max depth to recursively search for missing "
169 "frames through tail calls."));
174 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
178 cl::desc(
"Allow cloning of contexts through recursive cycles"));
185 cl::desc(
"Merge clones before assigning functions"));
194 cl::desc(
"Allow cloning of contexts having recursive cycles"));
200 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
211 cl::desc(
"Linking with hot/cold operator new interfaces"));
216 "Require target function definition when promoting indirect calls"));
237template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
238class CallsiteContextGraph {
240 CallsiteContextGraph() =
default;
241 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
242 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
249 void identifyClones();
256 bool assignFunctions();
263 const CallsiteContextGraph &CCG) {
268 friend struct GraphTraits<
269 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
270 friend struct DOTGraphTraits<
271 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
273 void exportToDot(std::string Label)
const;
276 struct FuncInfo final
277 :
public std::pair<FuncTy *, unsigned > {
278 using Base = std::pair<FuncTy *, unsigned>;
279 FuncInfo(
const Base &
B) : Base(
B) {}
280 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) : Base(
F, CloneNo) {}
281 explicit operator bool()
const {
return this->first !=
nullptr; }
282 FuncTy *
func()
const {
return this->first; }
283 unsigned cloneNo()
const {
return this->second; }
287 struct CallInfo final :
public std::pair<CallTy, unsigned > {
288 using Base = std::pair<CallTy, unsigned>;
289 CallInfo(
const Base &
B) : Base(
B) {}
290 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
291 : Base(
Call, CloneNo) {}
292 explicit operator bool()
const {
return (
bool)this->first; }
293 CallTy call()
const {
return this->first; }
294 unsigned cloneNo()
const {
return this->second; }
295 void setCloneNo(
unsigned N) { this->second =
N; }
296 void print(raw_ostream &OS)
const {
297 if (!
operator bool()) {
303 OS <<
"\t(clone " << cloneNo() <<
")";
309 friend raw_ostream &
operator<<(raw_ostream &OS,
const CallInfo &
Call) {
326 bool Recursive =
false;
330 uint8_t AllocTypes = 0;
349 uint64_t OrigStackOrAllocId = 0;
353 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
357 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
361 bool useCallerEdgesForContextInfo()
const {
366 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
378 DenseSet<uint32_t> getContextIds()
const {
384 for (
auto &
Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
386 DenseSet<uint32_t> ContextIds;
389 CalleeEdges, useCallerEdgesForContextInfo()
391 : std::vector<std::shared_ptr<ContextEdge>>());
392 for (
const auto &
Edge : Edges)
399 uint8_t computeAllocType()
const {
401 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
402 uint8_t
AllocType = (uint8_t)AllocationType::None;
404 CalleeEdges, useCallerEdgesForContextInfo()
406 : std::vector<std::shared_ptr<ContextEdge>>());
407 for (
const auto &
Edge : Edges) {
418 bool emptyContextIds()
const {
420 CalleeEdges, useCallerEdgesForContextInfo()
422 : std::vector<std::shared_ptr<ContextEdge>>());
423 for (
const auto &
Edge : Edges) {
424 if (!
Edge->getContextIds().empty())
431 std::vector<ContextNode *> Clones;
434 ContextNode *CloneOf =
nullptr;
436 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
438 ContextNode(
bool IsAllocation, CallInfo
C)
439 : IsAllocation(IsAllocation), Call(
C) {}
441 void addClone(ContextNode *Clone) {
443 CloneOf->Clones.push_back(Clone);
444 Clone->CloneOf = CloneOf;
446 Clones.push_back(Clone);
448 Clone->CloneOf =
this;
452 ContextNode *getOrigNode() {
459 unsigned int ContextId);
461 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
462 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
463 void eraseCalleeEdge(
const ContextEdge *
Edge);
464 void eraseCallerEdge(
const ContextEdge *
Edge);
466 void setCall(CallInfo
C) { Call =
C; }
468 bool hasCall()
const {
return (
bool)Call.call(); }
470 void printCall(raw_ostream &OS)
const { Call.print(OS); }
474 bool isRemoved()
const {
480 (AllocTypes == (uint8_t)AllocationType::None) ==
482 return AllocTypes == (uint8_t)AllocationType::None;
486 void print(raw_ostream &OS)
const;
488 friend raw_ostream &
operator<<(raw_ostream &OS,
const ContextNode &Node) {
502 uint8_t AllocTypes = 0;
510 bool IsBackedge =
false;
513 DenseSet<uint32_t> ContextIds;
515 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
516 DenseSet<uint32_t> ContextIds)
517 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
518 ContextIds(std::
move(ContextIds)) {}
520 DenseSet<uint32_t> &getContextIds() {
return ContextIds; }
524 inline void clear() {
526 AllocTypes = (uint8_t)AllocationType::None;
534 inline bool isRemoved()
const {
535 if (Callee || Caller)
540 assert(AllocTypes == (uint8_t)AllocationType::None);
541 assert(ContextIds.empty());
546 void print(raw_ostream &OS)
const;
548 friend raw_ostream &
operator<<(raw_ostream &OS,
const ContextEdge &
Edge) {
556 void removeNoneTypeCalleeEdges(ContextNode *Node);
557 void removeNoneTypeCallerEdges(ContextNode *Node);
559 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
560 DenseSet<const ContextNode *> &Visited);
565 template <
class NodeT,
class IteratorT>
566 std::vector<uint64_t>
567 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
571 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
574 template <
class NodeT,
class IteratorT>
575 void addStackNodesForMIB(ContextNode *AllocNode,
576 CallStack<NodeT, IteratorT> &StackContext,
577 CallStack<NodeT, IteratorT> &CallsiteContext,
584 void updateStackNodes();
588 void handleCallsitesWithMultipleTargets();
591 void markBackedges();
601 bool partitionCallsByCallee(
603 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
607 MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
610 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
614 DenseSet<uint32_t> DotAllocContextIds;
617 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
622 struct CallContextInfo {
626 std::vector<uint64_t> StackIds;
631 DenseSet<uint32_t> ContextIds;
640 void removeEdgeFromGraph(ContextEdge *
Edge, EdgeIter *EI =
nullptr,
641 bool CalleeIter =
true);
649 void assignStackNodesPostOrder(
650 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
651 DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
652 DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
657 DenseSet<uint32_t> duplicateContextIds(
658 const DenseSet<uint32_t> &StackSequenceContextIds,
659 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
662 void propagateDuplicateContextIds(
663 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
668 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
670 DenseSet<uint32_t> RemainingContextIds);
675 uint64_t getStackId(uint64_t IdOrIndex)
const {
676 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
686 calleesMatch(CallTy
Call, EdgeIter &EI,
687 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
691 const FuncTy *getCalleeFunc(CallTy
Call) {
692 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
698 bool calleeMatchesFunc(
699 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
700 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
701 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
702 Call, Func, CallerFunc, FoundCalleeChain);
706 bool sameCallee(CallTy Call1, CallTy Call2) {
707 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
712 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
713 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
718 uint64_t getLastStackId(CallTy
Call) {
719 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
724 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
725 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
730 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
735 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
736 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
742 FuncInfo cloneFunctionForCallsite(
743 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
744 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
745 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
746 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
751 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
752 unsigned CloneNo)
const {
753 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
757 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
758 CallInfo
C = CallInfo()) {
759 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
760 auto *NewNode = NodeOwner.back().get();
762 NodeToCallingFunc[NewNode] =
F;
767 ContextNode *getNodeForInst(
const CallInfo &
C);
768 ContextNode *getNodeForAlloc(
const CallInfo &
C);
769 ContextNode *getNodeForStackId(uint64_t StackId);
773 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds)
const;
778 uint8_t intersectAllocTypesImpl(
const DenseSet<uint32_t> &Node1Ids,
779 const DenseSet<uint32_t> &Node2Ids)
const;
783 uint8_t intersectAllocTypes(
const DenseSet<uint32_t> &Node1Ids,
784 const DenseSet<uint32_t> &Node2Ids)
const;
791 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
792 DenseSet<uint32_t> ContextIdsToMove = {});
798 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
799 ContextNode *NewCallee,
800 bool NewClone =
false,
801 DenseSet<uint32_t> ContextIdsToMove = {});
808 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
809 ContextNode *NewCaller);
812 void markBackedges(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
813 DenseSet<const ContextNode *> &CurrentStack);
817 mergeClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
818 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
820 void mergeNodeCalleeClones(
821 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
822 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
825 void findOtherCallersToShareMerge(
826 ContextNode *Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
827 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
828 DenseSet<ContextNode *> &OtherCallersToShareMerge);
834 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
835 const DenseSet<uint32_t> &AllocContextIds);
838 DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
844 DenseMap<uint32_t, std::vector<ContextTotalSize>> ContextIdToContextSizeInfos;
850 DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
853 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
854 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
857 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
863 unsigned int LastContextId = 0;
866template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
868 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
869template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
871 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
872template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
874 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
875template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
877 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
880class ModuleCallsiteContextGraph
881 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
884 ModuleCallsiteContextGraph(
886 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
889 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
892 uint64_t getStackId(uint64_t IdOrIndex)
const;
894 bool calleeMatchesFunc(
895 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
896 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
897 bool sameCallee(Instruction *Call1, Instruction *Call2);
898 bool findProfiledCalleeThroughTailCalls(
899 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
900 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
901 bool &FoundMultipleCalleeChains);
902 uint64_t getLastStackId(Instruction *
Call);
903 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
906 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
907 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
909 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
910 DenseMap<CallInfo, CallInfo> &CallMap,
911 std::vector<CallInfo> &CallsWithMetadataInFunc,
913 std::string getLabel(
const Function *Func,
const Instruction *
Call,
914 unsigned CloneNo)
const;
917 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
923struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
924 IndexCall() : PointerUnion() {}
925 IndexCall(std::nullptr_t) : IndexCall() {}
926 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
927 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
928 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
930 IndexCall *operator->() {
return this; }
932 void print(raw_ostream &OS)
const {
933 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
958class IndexCallsiteContextGraph
959 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
962 IndexCallsiteContextGraph(
963 ModuleSummaryIndex &Index,
967 ~IndexCallsiteContextGraph() {
972 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
974 for (
auto &Callsite :
I.second)
975 FS->addCallsite(*Callsite.second);
980 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
983 uint64_t getStackId(uint64_t IdOrIndex)
const;
984 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
985 bool calleeMatchesFunc(
986 IndexCall &
Call,
const FunctionSummary *Func,
987 const FunctionSummary *CallerFunc,
988 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
989 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
990 bool findProfiledCalleeThroughTailCalls(
991 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
992 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
993 bool &FoundMultipleCalleeChains);
994 uint64_t getLastStackId(IndexCall &
Call);
995 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
998 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
999 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1000 IndexCall>::FuncInfo
1001 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1002 DenseMap<CallInfo, CallInfo> &CallMap,
1003 std::vector<CallInfo> &CallsWithMetadataInFunc,
1005 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1006 unsigned CloneNo)
const;
1010 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1012 const ModuleSummaryIndex &Index;
1020 std::unordered_map<FunctionSummary *,
1021 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1022 FunctionCalleesToSynthesizedCallsiteInfos;
1034 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1037 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1059template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1060bool allocTypesMatch(
1061 const std::vector<uint8_t> &InAllocTypes,
1062 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1066 assert(InAllocTypes.size() == Edges.size());
1068 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1070 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1074 if (l == (uint8_t)AllocationType::None ||
1075 r->AllocTypes == (uint8_t)AllocationType::None)
1077 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1086template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1087bool allocTypesMatchClone(
1088 const std::vector<uint8_t> &InAllocTypes,
1089 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1090 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1094 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1098 for (
const auto &
E : Clone->CalleeEdges) {
1100 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1104 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1105 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1107 if (Iter == EdgeCalleeMap.
end())
1115 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1123template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1124typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1125CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1126 const CallInfo &
C) {
1127 ContextNode *
Node = getNodeForAlloc(
C);
1131 return NonAllocationCallToContextNodeMap.lookup(
C);
1134template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1135typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1136CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1137 const CallInfo &
C) {
1138 return AllocationCallToContextNodeMap.lookup(
C);
1141template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1142typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1143CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1145 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1146 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1147 return StackEntryNode->second;
1151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1152void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1154 unsigned int ContextId) {
1155 for (
auto &
Edge : CallerEdges) {
1156 if (
Edge->Caller == Caller) {
1158 Edge->getContextIds().insert(ContextId);
1162 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1163 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1164 CallerEdges.push_back(
Edge);
1168template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1169void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1170 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1186 auto CalleeCallerCount =
Callee->CallerEdges.size();
1187 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1192 }
else if (CalleeIter) {
1194 *EI =
Caller->CalleeEdges.erase(*EI);
1197 *EI =
Callee->CallerEdges.erase(*EI);
1199 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1200 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1203template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1204void CallsiteContextGraph<
1205 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1206 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1208 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1210 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1216template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1217void CallsiteContextGraph<
1218 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1219 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1221 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1223 Edge->Caller->eraseCalleeEdge(
Edge.get());
1224 EI =
Node->CallerEdges.erase(EI);
1230template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1231typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1232CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1233 findEdgeFromCallee(
const ContextNode *Callee) {
1234 for (
const auto &
Edge : CalleeEdges)
1235 if (
Edge->Callee == Callee)
1240template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1241typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1242CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1243 findEdgeFromCaller(
const ContextNode *Caller) {
1244 for (
const auto &
Edge : CallerEdges)
1245 if (
Edge->Caller == Caller)
1250template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1251void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1252 eraseCalleeEdge(
const ContextEdge *
Edge) {
1254 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1255 return CalleeEdge.get() ==
Edge;
1257 assert(EI != CalleeEdges.end());
1258 CalleeEdges.erase(EI);
1261template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1262void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1263 eraseCallerEdge(
const ContextEdge *
Edge) {
1265 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1266 return CallerEdge.get() ==
Edge;
1268 assert(EI != CallerEdges.end());
1269 CallerEdges.erase(EI);
1272template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1273uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1274 DenseSet<uint32_t> &ContextIds)
const {
1276 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1277 uint8_t
AllocType = (uint8_t)AllocationType::None;
1278 for (
auto Id : ContextIds) {
1279 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1287template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1289CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1290 const DenseSet<uint32_t> &Node1Ids,
1291 const DenseSet<uint32_t> &Node2Ids)
const {
1293 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1294 uint8_t
AllocType = (uint8_t)AllocationType::None;
1295 for (
auto Id : Node1Ids) {
1296 if (!Node2Ids.
count(Id))
1298 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1306template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1307uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1308 const DenseSet<uint32_t> &Node1Ids,
1309 const DenseSet<uint32_t> &Node2Ids)
const {
1310 if (Node1Ids.
size() < Node2Ids.
size())
1311 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1313 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1316template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1317typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1318CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1319 CallInfo
Call,
const FuncTy *
F) {
1321 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1322 AllocationCallToContextNodeMap[
Call] = AllocNode;
1324 AllocNode->OrigStackOrAllocId = LastContextId;
1327 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1343template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1344template <
class NodeT,
class IteratorT>
1345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1346 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1354 ContextIdToAllocationType[++LastContextId] =
AllocType;
1356 if (!ContextSizeInfo.
empty()) {
1357 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1362 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1367 ContextNode *PrevNode = AllocNode;
1371 SmallSet<uint64_t, 8> StackIdSet;
1374 ContextIter != StackContext.
end(); ++ContextIter) {
1375 auto StackId = getStackId(*ContextIter);
1376 ContextNode *StackNode = getNodeForStackId(StackId);
1378 StackNode = createNewNode(
false);
1379 StackEntryIdToContextNodeMap[StackId] = StackNode;
1380 StackNode->OrigStackOrAllocId = StackId;
1387 StackNode->Recursive =
true;
1389 StackNode->AllocTypes |= (uint8_t)
AllocType;
1390 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1391 PrevNode = StackNode;
1395template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1397CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1398 const DenseSet<uint32_t> &StackSequenceContextIds,
1399 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1400 DenseSet<uint32_t> NewContextIds;
1401 for (
auto OldId : StackSequenceContextIds) {
1402 NewContextIds.
insert(++LastContextId);
1403 OldToNewContextIds[OldId].insert(LastContextId);
1404 assert(ContextIdToAllocationType.count(OldId));
1406 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1407 if (DotAllocContextIds.
contains(OldId))
1408 DotAllocContextIds.
insert(LastContextId);
1410 return NewContextIds;
1413template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1414void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1415 propagateDuplicateContextIds(
1416 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1418 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1419 DenseSet<uint32_t> NewIds;
1420 for (
auto Id : ContextIds)
1421 if (
auto NewId = OldToNewContextIds.find(Id);
1422 NewId != OldToNewContextIds.end())
1428 auto UpdateCallers = [&](ContextNode *
Node,
1429 DenseSet<const ContextEdge *> &Visited,
1430 auto &&UpdateCallers) ->
void {
1431 for (
const auto &
Edge :
Node->CallerEdges) {
1435 ContextNode *NextNode =
Edge->Caller;
1436 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1439 if (!NewIdsToAdd.
empty()) {
1440 Edge->getContextIds().insert_range(NewIdsToAdd);
1441 UpdateCallers(NextNode, Visited, UpdateCallers);
1446 DenseSet<const ContextEdge *> Visited;
1447 for (
auto &Entry : AllocationCallToContextNodeMap) {
1449 UpdateCallers(Node, Visited, UpdateCallers);
1453template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1454void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1455 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1458 DenseSet<uint32_t> RemainingContextIds) {
1460 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1461 DenseSet<uint32_t> RecursiveContextIds;
1462 DenseSet<uint32_t> AllCallerContextIds;
1467 for (
auto &CE : OrigEdges) {
1468 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1469 for (
auto Id :
CE->getContextIds())
1470 if (!AllCallerContextIds.
insert(Id).second)
1471 RecursiveContextIds.
insert(Id);
1475 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1477 DenseSet<uint32_t> NewEdgeContextIds;
1478 DenseSet<uint32_t> NotFoundContextIds;
1482 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1483 NotFoundContextIds);
1486 if (RecursiveContextIds.
empty()) {
1489 RemainingContextIds.
swap(NotFoundContextIds);
1499 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1501 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1504 if (NewEdgeContextIds.
empty()) {
1508 if (TowardsCallee) {
1509 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1510 auto NewEdge = std::make_shared<ContextEdge>(
1511 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1512 NewNode->CalleeEdges.push_back(NewEdge);
1513 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1515 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1516 auto NewEdge = std::make_shared<ContextEdge>(
1517 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1518 NewNode->CallerEdges.push_back(NewEdge);
1519 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1522 if (
Edge->getContextIds().empty()) {
1523 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1530template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1532 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1536 assert(!Edge->ContextIds.empty());
1539template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1541 bool CheckEdges =
true) {
1542 if (
Node->isRemoved())
1546 auto NodeContextIds =
Node->getContextIds();
1550 if (
Node->CallerEdges.size()) {
1552 Node->CallerEdges.front()->ContextIds);
1556 set_union(CallerEdgeContextIds, Edge->ContextIds);
1563 NodeContextIds == CallerEdgeContextIds ||
1566 if (
Node->CalleeEdges.size()) {
1568 Node->CalleeEdges.front()->ContextIds);
1572 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1578 NodeContextIds == CalleeEdgeContextIds);
1587 for (
const auto &
E :
Node->CalleeEdges)
1593template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1594void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1595 assignStackNodesPostOrder(
1596 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1597 DenseMap<uint64_t, std::vector<CallContextInfo>>
1598 &StackIdToMatchingCalls,
1599 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1607 auto CallerEdges =
Node->CallerEdges;
1608 for (
auto &
Edge : CallerEdges) {
1610 if (
Edge->isRemoved()) {
1614 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1615 CallToMatchingCall);
1624 if (
Node->IsAllocation ||
1625 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1628 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1632 if (Calls.size() == 1) {
1633 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1634 if (Ids.size() == 1) {
1635 assert(SavedContextIds.empty());
1637 assert(Node == getNodeForStackId(Ids[0]));
1638 if (
Node->Recursive)
1641 NonAllocationCallToContextNodeMap[
Call] =
Node;
1650 uint64_t LastId =
Node->OrigStackOrAllocId;
1651 ContextNode *LastNode = getNodeForStackId(LastId);
1654 assert(LastNode == Node);
1656 ContextNode *LastNode =
Node;
1661 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1663 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1664 bool CreatedNode =
false;
1665 for (
unsigned I = 0;
I < Calls.size();
1666 I++, PrevIterCreatedNode = CreatedNode) {
1667 CreatedNode =
false;
1668 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1671 if (SavedContextIds.empty()) {
1678 auto MatchingCall = CallToMatchingCall[
Call];
1679 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1683 assert(
I > 0 && !PrevIterCreatedNode);
1686 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1691 assert(LastId == Ids.back());
1700 ContextNode *PrevNode = LastNode;
1704 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1706 ContextNode *CurNode = getNodeForStackId(Id);
1710 assert(!CurNode->Recursive);
1712 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1724 if (SavedContextIds.empty()) {
1733 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1734 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1736 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1738 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1744 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1749 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1754 for (
auto Id : Ids) {
1755 ContextNode *CurNode = getNodeForStackId(Id);
1762 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1769 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1770 if (PrevEdge->getContextIds().empty())
1771 removeEdgeFromGraph(PrevEdge);
1776 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1777 ? (uint8_t)AllocationType::None
1778 : CurNode->computeAllocType();
1783 for (
auto Id : Ids) {
1784 ContextNode *CurNode = getNodeForStackId(Id);
1793template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1794void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1802 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1803 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1804 for (
auto &
Call : CallsWithMetadata) {
1806 if (AllocationCallToContextNodeMap.count(
Call))
1808 auto StackIdsWithContextNodes =
1809 getStackIdsWithContextNodesForCall(
Call.call());
1812 if (StackIdsWithContextNodes.empty())
1816 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1817 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
1827 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1831 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1832 for (
auto &It : StackIdToMatchingCalls) {
1833 auto &Calls = It.getSecond();
1835 if (Calls.size() == 1) {
1836 auto &Ids = Calls[0].StackIds;
1837 if (Ids.size() == 1)
1850 DenseMap<const FuncTy *, unsigned> FuncToIndex;
1851 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
1852 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
1855 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1856 return A.StackIds.size() >
B.StackIds.size() ||
1857 (
A.StackIds.size() ==
B.StackIds.size() &&
1858 (
A.StackIds <
B.StackIds ||
1859 (
A.StackIds ==
B.StackIds &&
1860 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1866 uint64_t LastId = It.getFirst();
1867 ContextNode *LastNode = getNodeForStackId(LastId);
1871 if (LastNode->Recursive)
1876 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1884 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1887 for (
unsigned I = 0;
I < Calls.size();
I++) {
1888 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1889 assert(SavedContextIds.empty());
1890 assert(LastId == Ids.back());
1895 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1896 MatchingIdsFuncSet.
clear();
1903 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1905 ContextNode *PrevNode = LastNode;
1906 ContextNode *CurNode = LastNode;
1911 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1913 CurNode = getNodeForStackId(Id);
1917 if (CurNode->Recursive) {
1922 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1943 if (StackSequenceContextIds.
empty()) {
1956 if (Ids.back() != getLastStackId(
Call)) {
1957 for (
const auto &PE : LastNode->CallerEdges) {
1958 set_subtract(StackSequenceContextIds, PE->getContextIds());
1959 if (StackSequenceContextIds.
empty())
1963 if (StackSequenceContextIds.
empty())
1975 MatchingIdsFuncSet.
insert(Func);
1982 bool DuplicateContextIds =
false;
1983 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1984 auto &CallCtxInfo = Calls[J];
1985 auto &NextIds = CallCtxInfo.StackIds;
1988 auto *NextFunc = CallCtxInfo.Func;
1989 if (NextFunc != Func) {
1992 DuplicateContextIds =
true;
1995 auto &NextCall = CallCtxInfo.Call;
1996 CallToMatchingCall[NextCall] =
Call;
2007 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2008 StackSequenceContextIds.
size());
2011 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2012 : StackSequenceContextIds;
2013 assert(!SavedContextIds.empty());
2015 if (!DuplicateContextIds) {
2019 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2020 if (LastNodeContextIds.
empty())
2027 propagateDuplicateContextIds(OldToNewContextIds);
2037 DenseSet<const ContextNode *> Visited;
2038 for (
auto &Entry : AllocationCallToContextNodeMap)
2039 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2040 CallToMatchingCall);
2045uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2046 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2048 return CallsiteContext.
back();
2051uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2053 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2056 return Index.getStackIdAtIndex(CallsiteContext.
back());
2078 auto Pos =
F.getName().find_last_of(
'.');
2081 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2087std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2088 const Instruction *
Call,
2089 unsigned CloneNo)
const {
2095std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2096 const IndexCall &
Call,
2097 unsigned CloneNo)
const {
2098 auto VI = FSToVIMap.find(Func);
2099 assert(VI != FSToVIMap.end());
2102 return CallerName +
" -> alloc";
2105 return CallerName +
" -> " +
2107 Callsite->Clones[CloneNo]);
2111std::vector<uint64_t>
2112ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2113 Instruction *
Call) {
2114 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2116 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2120std::vector<uint64_t>
2121IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2123 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2125 return getStackIdsWithContextNodes<CallsiteInfo,
2126 SmallVector<unsigned>::const_iterator>(
2130template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2131template <
class NodeT,
class IteratorT>
2132std::vector<uint64_t>
2133CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2134 CallStack<NodeT, IteratorT> &CallsiteContext) {
2135 std::vector<uint64_t> StackIds;
2136 for (
auto IdOrIndex : CallsiteContext) {
2137 auto StackId = getStackId(IdOrIndex);
2138 ContextNode *
Node = getNodeForStackId(StackId);
2141 StackIds.push_back(StackId);
2146ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2148 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2149 :
Mod(
M), OREGetter(OREGetter) {
2151 std::vector<CallInfo> CallsWithMetadata;
2152 for (
auto &BB :
F) {
2153 for (
auto &
I : BB) {
2156 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2157 CallsWithMetadata.push_back(&
I);
2158 auto *AllocNode = addAllocNode(&
I, &
F);
2159 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2163 for (
auto &MDOp : MemProfMD->operands()) {
2165 std::vector<ContextTotalSize> ContextSizeInfo;
2167 if (MIBMD->getNumOperands() > 2) {
2168 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2169 MDNode *ContextSizePair =
2178 ContextSizeInfo.push_back({FullStackId, TotalSize});
2184 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2185 AllocNode, StackContext, CallsiteContext,
2191 DotAllocContextIds = AllocNode->getContextIds();
2195 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2196 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2199 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2200 CallsWithMetadata.push_back(&
I);
2204 if (!CallsWithMetadata.empty())
2205 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2209 dbgs() <<
"CCG before updating call stack chains:\n";
2214 exportToDot(
"prestackupdate");
2219 exportToDot(
"poststackupdate");
2221 handleCallsitesWithMultipleTargets();
2226 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2227 for (
auto &
Call : FuncEntry.second)
2228 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2231IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2235 : Index(Index), isPrevailing(isPrevailing) {
2236 for (
auto &
I : Index) {
2237 auto VI = Index.getValueInfo(
I);
2238 for (
auto &S : VI.getSummaryList()) {
2247 !isPrevailing(VI.getGUID(), S.get()))
2252 std::vector<CallInfo> CallsWithMetadata;
2253 if (!
FS->allocs().empty()) {
2254 for (
auto &AN :
FS->mutableAllocs()) {
2259 if (AN.MIBs.empty())
2261 IndexCall AllocCall(&AN);
2262 CallsWithMetadata.push_back(AllocCall);
2263 auto *AllocNode = addAllocNode(AllocCall, FS);
2271 AN.ContextSizeInfos.size() == AN.MIBs.size());
2273 for (
auto &MIB : AN.MIBs) {
2276 std::vector<ContextTotalSize> ContextSizeInfo;
2277 if (!AN.ContextSizeInfos.empty()) {
2278 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2279 ContextSizeInfo.push_back({FullStackId, TotalSize});
2281 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2282 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2289 DotAllocContextIds = AllocNode->getContextIds();
2295 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2299 if (!
FS->callsites().empty())
2300 for (
auto &SN :
FS->mutableCallsites()) {
2301 IndexCall StackNodeCall(&SN);
2302 CallsWithMetadata.push_back(StackNodeCall);
2305 if (!CallsWithMetadata.empty())
2306 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2308 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2314 dbgs() <<
"CCG before updating call stack chains:\n";
2319 exportToDot(
"prestackupdate");
2324 exportToDot(
"poststackupdate");
2326 handleCallsitesWithMultipleTargets();
2331template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2332void CallsiteContextGraph<DerivedCCG, FuncTy,
2333 CallTy>::handleCallsitesWithMultipleTargets() {
2348 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2349 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2350 auto *
Node = Entry.second;
2359 std::vector<CallInfo> AllCalls;
2360 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2361 AllCalls.push_back(
Node->Call);
2375 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2378 auto It = AllCalls.begin();
2380 for (; It != AllCalls.end(); ++It) {
2383 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2386 if (!Edge->Callee->hasCall())
2388 assert(NodeToCallingFunc.count(Edge->Callee));
2390 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2399 if (
Node->Call != ThisCall) {
2400 Node->setCall(ThisCall);
2411 Node->MatchingCalls.clear();
2414 if (It == AllCalls.end()) {
2415 RemovedEdgesWithMismatchedCallees++;
2419 Node->setCall(CallInfo());
2424 for (++It; It != AllCalls.end(); ++It) {
2428 Node->MatchingCalls.push_back(ThisCall);
2437 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2438 return !it.second->hasCall() || it.second->Call != it.first;
2442 for (
auto &[
Call,
Node] : NewCallToNode)
2443 NonAllocationCallToContextNodeMap[
Call] =
Node;
2447 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2448 NonAllocationCallToContextNodeMap[
Call] =
Node;
2451template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2452bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2454 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2458 struct CallsWithSameCallee {
2459 std::vector<CallInfo> Calls;
2460 ContextNode *
Node =
nullptr;
2466 for (
auto ThisCall : AllCalls) {
2467 auto *
F = getCalleeFunc(
ThisCall.call());
2469 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2478 for (
const auto &Edge :
Node->CalleeEdges) {
2479 if (!Edge->Callee->hasCall())
2481 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2482 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2483 CalleeNodeToCallInfo[Edge->Callee] =
2484 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2490 if (CalleeNodeToCallInfo.
empty())
2502 ContextNode *UnmatchedCalleesNode =
nullptr;
2504 bool UsedOrigNode =
false;
2509 auto CalleeEdges =
Node->CalleeEdges;
2510 for (
auto &Edge : CalleeEdges) {
2511 if (!Edge->Callee->hasCall())
2516 ContextNode *CallerNodeToUse =
nullptr;
2520 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2521 if (!UnmatchedCalleesNode)
2522 UnmatchedCalleesNode =
2523 createNewNode(
false, NodeToCallingFunc[
Node]);
2524 CallerNodeToUse = UnmatchedCalleesNode;
2528 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2531 if (!UsedOrigNode) {
2534 Node->MatchingCalls.clear();
2535 UsedOrigNode =
true;
2538 createNewNode(
false, NodeToCallingFunc[
Node]);
2542 Info->Node->setCall(
Info->Calls.front());
2548 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2550 CallerNodeToUse =
Info->Node;
2554 if (CallerNodeToUse ==
Node)
2557 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2564 for (
auto &
I : CalleeNodeToCallInfo)
2565 removeNoneTypeCallerEdges(
I.second->Node);
2566 if (UnmatchedCalleesNode)
2567 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2568 removeNoneTypeCallerEdges(
Node);
2578uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2581 return Index.getStackIdAtIndex(IdOrIndex);
2584template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2585bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2586 CallTy
Call, EdgeIter &EI,
2587 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2589 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2590 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2593 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2594 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2599 if (FoundCalleeChain.empty())
2603 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2607 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2608 CurEdge->AllocTypes |=
Edge->AllocTypes;
2613 auto NewEdge = std::make_shared<ContextEdge>(
2614 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2615 Callee->CallerEdges.push_back(NewEdge);
2616 if (Caller ==
Edge->Caller) {
2620 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2623 "Iterator position not restored after insert and increment");
2625 Caller->CalleeEdges.push_back(NewEdge);
2630 auto *CurCalleeNode =
Edge->Callee;
2631 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2632 ContextNode *NewNode =
nullptr;
2634 if (TailCallToContextNodeMap.
count(NewCall)) {
2635 NewNode = TailCallToContextNodeMap[NewCall];
2636 NewNode->AllocTypes |=
Edge->AllocTypes;
2638 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2640 NewNode = createNewNode(
false, Func, NewCall);
2641 TailCallToContextNodeMap[NewCall] = NewNode;
2642 NewNode->AllocTypes =
Edge->AllocTypes;
2646 AddEdge(NewNode, CurCalleeNode);
2648 CurCalleeNode = NewNode;
2652 AddEdge(
Edge->Caller, CurCalleeNode);
2660 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2672bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2673 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2674 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2675 bool &FoundMultipleCalleeChains) {
2682 FoundCalleeChain.push_back({Callsite,
F});
2697 bool FoundSingleCalleeChain =
false;
2698 for (
auto &BB : *CalleeFunc) {
2699 for (
auto &
I : BB) {
2701 if (!CB || !CB->isTailCall())
2703 auto *CalledValue = CB->getCalledOperand();
2704 auto *CalledFunction = CB->getCalledFunction();
2705 if (CalledValue && !CalledFunction) {
2706 CalledValue = CalledValue->stripPointerCasts();
2713 assert(!CalledFunction &&
2714 "Expected null called function in callsite for alias");
2717 if (!CalledFunction)
2719 if (CalledFunction == ProfiledCallee) {
2720 if (FoundSingleCalleeChain) {
2721 FoundMultipleCalleeChains =
true;
2724 FoundSingleCalleeChain =
true;
2725 FoundProfiledCalleeCount++;
2726 FoundProfiledCalleeDepth +=
Depth;
2727 if (
Depth > FoundProfiledCalleeMaxDepth)
2728 FoundProfiledCalleeMaxDepth =
Depth;
2729 SaveCallsiteInfo(&
I, CalleeFunc);
2730 }
else if (findProfiledCalleeThroughTailCalls(
2731 ProfiledCallee, CalledFunction,
Depth + 1,
2732 FoundCalleeChain, FoundMultipleCalleeChains)) {
2735 assert(!FoundMultipleCalleeChains);
2736 if (FoundSingleCalleeChain) {
2737 FoundMultipleCalleeChains =
true;
2740 FoundSingleCalleeChain =
true;
2741 SaveCallsiteInfo(&
I, CalleeFunc);
2742 }
else if (FoundMultipleCalleeChains)
2747 return FoundSingleCalleeChain;
2750const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2752 if (!CB->getCalledOperand() || CB->isIndirectCall())
2754 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2761bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2762 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
2763 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2765 if (!CB->getCalledOperand() || CB->isIndirectCall())
2767 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2769 if (CalleeFunc == Func)
2772 if (Alias && Alias->getAliasee() == Func)
2783 bool FoundMultipleCalleeChains =
false;
2784 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2786 FoundMultipleCalleeChains)) {
2787 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2788 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2789 <<
" that actually called " << CalleeVal->getName()
2790 << (FoundMultipleCalleeChains
2791 ?
" (found multiple possible chains)"
2794 if (FoundMultipleCalleeChains)
2795 FoundProfiledCalleeNonUniquelyCount++;
2802bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2803 Instruction *Call2) {
2805 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2807 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2810 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2812 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2814 return CalleeFunc1 == CalleeFunc2;
2817bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2818 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
2819 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2820 bool &FoundMultipleCalleeChains) {
2826 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
2829 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2830 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2833 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
2834 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2835 CallsiteInfo *NewCallsiteInfo =
2836 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2837 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2844 bool FoundSingleCalleeChain =
false;
2847 !isPrevailing(CurCallee.
getGUID(), S.get()))
2852 auto FSVI = CurCallee;
2855 FSVI = AS->getAliaseeVI();
2856 for (
auto &CallEdge :
FS->calls()) {
2857 if (!CallEdge.second.hasTailCall())
2859 if (CallEdge.first == ProfiledCallee) {
2860 if (FoundSingleCalleeChain) {
2861 FoundMultipleCalleeChains =
true;
2864 FoundSingleCalleeChain =
true;
2865 FoundProfiledCalleeCount++;
2866 FoundProfiledCalleeDepth +=
Depth;
2867 if (
Depth > FoundProfiledCalleeMaxDepth)
2868 FoundProfiledCalleeMaxDepth =
Depth;
2869 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2871 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2872 FSToVIMap[
FS] = FSVI;
2873 }
else if (findProfiledCalleeThroughTailCalls(
2874 ProfiledCallee, CallEdge.first,
Depth + 1,
2875 FoundCalleeChain, FoundMultipleCalleeChains)) {
2878 assert(!FoundMultipleCalleeChains);
2879 if (FoundSingleCalleeChain) {
2880 FoundMultipleCalleeChains =
true;
2883 FoundSingleCalleeChain =
true;
2884 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2886 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2887 FSToVIMap[
FS] = FSVI;
2888 }
else if (FoundMultipleCalleeChains)
2893 return FoundSingleCalleeChain;
2896const FunctionSummary *
2897IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
2899 if (
Callee.getSummaryList().empty())
2904bool IndexCallsiteContextGraph::calleeMatchesFunc(
2905 IndexCall &
Call,
const FunctionSummary *Func,
2906 const FunctionSummary *CallerFunc,
2907 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2911 AliasSummary *Alias =
2912 Callee.getSummaryList().empty()
2915 assert(FSToVIMap.count(Func));
2916 auto FuncVI = FSToVIMap[
Func];
2917 if (Callee == FuncVI ||
2932 bool FoundMultipleCalleeChains =
false;
2933 if (!findProfiledCalleeThroughTailCalls(
2934 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2935 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2936 <<
" from " << FSToVIMap[CallerFunc]
2937 <<
" that actually called " << Callee
2938 << (FoundMultipleCalleeChains
2939 ?
" (found multiple possible chains)"
2942 if (FoundMultipleCalleeChains)
2943 FoundProfiledCalleeNonUniquelyCount++;
2950bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2953 return Callee1 == Callee2;
2956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2957void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2963template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2964void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2965 raw_ostream &OS)
const {
2966 OS <<
"Node " <<
this <<
"\n";
2970 OS <<
" (recursive)";
2972 if (!MatchingCalls.
empty()) {
2973 OS <<
"\tMatchingCalls:\n";
2974 for (
auto &MatchingCall : MatchingCalls) {
2976 MatchingCall.print(OS);
2981 OS <<
"\tContextIds:";
2983 auto ContextIds = getContextIds();
2984 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2985 std::sort(SortedIds.begin(), SortedIds.end());
2986 for (
auto Id : SortedIds)
2989 OS <<
"\tCalleeEdges:\n";
2990 for (
auto &
Edge : CalleeEdges)
2991 OS <<
"\t\t" << *
Edge <<
"\n";
2992 OS <<
"\tCallerEdges:\n";
2993 for (
auto &
Edge : CallerEdges)
2994 OS <<
"\t\t" << *
Edge <<
"\n";
2995 if (!Clones.empty()) {
2997 }
else if (CloneOf) {
2998 OS <<
"\tClone of " << CloneOf <<
"\n";
3002template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3003void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3009template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3010void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3011 raw_ostream &OS)
const {
3012 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3013 << (IsBackedge ?
" (BE)" :
"")
3015 OS <<
" ContextIds:";
3016 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3017 std::sort(SortedIds.begin(), SortedIds.end());
3018 for (
auto Id : SortedIds)
3022template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3023void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3027template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3028void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3029 raw_ostream &OS)
const {
3030 OS <<
"Callsite Context Graph:\n";
3031 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3033 if (
Node->isRemoved())
3040template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3041void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3042 raw_ostream &OS)
const {
3043 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3045 if (
Node->isRemoved())
3047 if (!
Node->IsAllocation)
3049 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3050 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3051 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3052 std::sort(SortedIds.begin(), SortedIds.end());
3053 for (
auto Id : SortedIds) {
3054 auto TypeI = ContextIdToAllocationType.find(Id);
3055 assert(TypeI != ContextIdToAllocationType.end());
3056 auto CSI = ContextIdToContextSizeInfos.find(Id);
3057 if (CSI != ContextIdToContextSizeInfos.end()) {
3058 for (
auto &
Info : CSI->second) {
3059 OS <<
"MemProf hinting: "
3061 <<
" full allocation context " <<
Info.FullStackId
3062 <<
" with total size " <<
Info.TotalSize <<
" is "
3064 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3066 <<
" due to cold byte percent";
3068 OS <<
" (context id " <<
Id <<
")";
3076template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3077void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3078 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3081 for (
auto &
Edge :
Node->CallerEdges)
3086template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3088 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3089 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3091 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3107 return G->NodeOwner.begin()->get();
3110 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3111 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3130template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3144 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3150 std::string LabelString =
3151 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3154 LabelString +=
"\n";
3155 if (
Node->hasCall()) {
3156 auto Func =
G->NodeToCallingFunc.find(
Node);
3157 assert(Func !=
G->NodeToCallingFunc.end());
3159 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3161 LabelString +=
"null call";
3162 if (
Node->Recursive)
3163 LabelString +=
" (recursive)";
3165 LabelString +=
" (external)";
3171 auto ContextIds =
Node->getContextIds();
3175 bool Highlight =
false;
3184 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3185 getContextIds(ContextIds) +
"\"")
3189 AttributeString +=
",fontsize=\"30\"";
3191 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3193 if (
Node->CloneOf) {
3194 AttributeString +=
",color=\"blue\"";
3195 AttributeString +=
",style=\"filled,bold,dashed\"";
3197 AttributeString +=
",style=\"filled\"";
3198 return AttributeString;
3203 auto &Edge = *(ChildIter.getCurrent());
3208 bool Highlight =
false;
3217 auto Color = getColor(Edge->AllocTypes, Highlight);
3218 std::string AttributeString =
3219 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3221 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3224 if (Edge->IsBackedge)
3225 AttributeString +=
",style=\"dotted\"";
3228 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3229 return AttributeString;
3235 if (
Node->isRemoved())
3248 std::string IdString =
"ContextIds:";
3249 if (ContextIds.
size() < 100) {
3250 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3251 std::sort(SortedIds.begin(), SortedIds.end());
3252 for (
auto Id : SortedIds)
3253 IdString += (
" " +
Twine(Id)).str();
3255 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3260 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3266 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3268 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3269 if (AllocTypes == (uint8_t)AllocationType::Cold)
3270 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3272 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3273 return Highlight ?
"magenta" :
"mediumorchid1";
3277 static std::string getNodeId(NodeRef Node) {
3278 std::stringstream SStream;
3279 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3280 std::string
Result = SStream.str();
3289template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3294template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3295void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3296 std::string Label)
const {
3301template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3302typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3303CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3304 const std::shared_ptr<ContextEdge> &
Edge,
3305 DenseSet<uint32_t> ContextIdsToMove) {
3307 assert(NodeToCallingFunc.count(Node));
3308 ContextNode *Clone =
3309 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3310 Node->addClone(Clone);
3311 Clone->MatchingCalls =
Node->MatchingCalls;
3312 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3318void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3319 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3320 ContextNode *NewCallee,
bool NewClone,
3321 DenseSet<uint32_t> ContextIdsToMove) {
3324 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3326 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3328 ContextNode *OldCallee =
Edge->Callee;
3332 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3336 if (ContextIdsToMove.
empty())
3337 ContextIdsToMove =
Edge->getContextIds();
3341 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3344 NewCallee->AllocTypes |=
Edge->AllocTypes;
3346 if (ExistingEdgeToNewCallee) {
3349 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3350 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3351 assert(
Edge->ContextIds == ContextIdsToMove);
3352 removeEdgeFromGraph(
Edge.get());
3355 Edge->Callee = NewCallee;
3356 NewCallee->CallerEdges.push_back(
Edge);
3358 OldCallee->eraseCallerEdge(
Edge.get());
3365 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3366 if (ExistingEdgeToNewCallee) {
3369 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3370 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3373 auto NewEdge = std::make_shared<ContextEdge>(
3374 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3375 Edge->Caller->CalleeEdges.push_back(NewEdge);
3376 NewCallee->CallerEdges.push_back(NewEdge);
3380 NewCallee->AllocTypes |= CallerEdgeAllocType;
3382 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3387 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3388 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3392 if (CalleeToUse == OldCallee) {
3396 if (EdgeIsRecursive) {
3400 CalleeToUse = NewCallee;
3404 DenseSet<uint32_t> EdgeContextIdsToMove =
3406 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3407 OldCalleeEdge->AllocTypes =
3408 computeAllocType(OldCalleeEdge->getContextIds());
3415 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3416 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3417 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3421 auto NewEdge = std::make_shared<ContextEdge>(
3422 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3423 EdgeContextIdsToMove);
3424 NewCallee->CalleeEdges.push_back(NewEdge);
3425 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3429 OldCallee->AllocTypes = OldCallee->computeAllocType();
3431 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3432 OldCallee->emptyContextIds());
3436 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3439 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3445template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3446void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3447 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3448 ContextNode *NewCaller) {
3449 auto *OldCallee =
Edge->Callee;
3450 auto *NewCallee = OldCallee;
3453 bool Recursive =
Edge->Caller ==
Edge->Callee;
3455 NewCallee = NewCaller;
3457 ContextNode *OldCaller =
Edge->Caller;
3458 OldCaller->eraseCalleeEdge(
Edge.get());
3462 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3464 if (ExistingEdgeToNewCaller) {
3467 ExistingEdgeToNewCaller->getContextIds().insert_range(
3468 Edge->getContextIds());
3469 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3470 Edge->ContextIds.clear();
3471 Edge->AllocTypes = (uint8_t)AllocationType::None;
3472 OldCallee->eraseCallerEdge(
Edge.get());
3475 Edge->Caller = NewCaller;
3476 NewCaller->CalleeEdges.push_back(
Edge);
3478 assert(NewCallee == NewCaller);
3481 Edge->Callee = NewCallee;
3482 NewCallee->CallerEdges.push_back(
Edge);
3483 OldCallee->eraseCallerEdge(
Edge.get());
3489 NewCaller->AllocTypes |=
Edge->AllocTypes;
3496 bool IsNewNode = NewCaller->CallerEdges.empty();
3505 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3506 auto OldCallerCaller = OldCallerEdge->Caller;
3510 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3511 if (OldCaller == OldCallerCaller) {
3512 OldCallerCaller = NewCaller;
3518 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3519 OldCallerEdge->AllocTypes =
3520 computeAllocType(OldCallerEdge->getContextIds());
3525 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3529 if (ExistingCallerEdge) {
3530 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3531 ExistingCallerEdge->AllocTypes |=
3532 computeAllocType(EdgeContextIdsToMove);
3535 auto NewEdge = std::make_shared<ContextEdge>(
3536 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3537 EdgeContextIdsToMove);
3538 NewCaller->CallerEdges.push_back(NewEdge);
3539 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3544 OldCaller->AllocTypes = OldCaller->computeAllocType();
3546 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3547 OldCaller->emptyContextIds());
3551 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3554 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3560template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3561void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3562 recursivelyRemoveNoneTypeCalleeEdges(
3563 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3568 removeNoneTypeCalleeEdges(Node);
3570 for (
auto *Clone :
Node->Clones)
3571 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3575 auto CallerEdges =
Node->CallerEdges;
3576 for (
auto &
Edge : CallerEdges) {
3578 if (
Edge->isRemoved()) {
3582 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3587template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3588void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3593 DenseSet<const ContextNode *> Visited;
3594 DenseSet<const ContextNode *> CurrentStack;
3595 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3597 if (
Node->isRemoved())
3600 if (!
Node->CallerEdges.empty())
3602 markBackedges(Node, Visited, CurrentStack);
3608template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3609void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3610 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3611 DenseSet<const ContextNode *> &CurrentStack) {
3612 auto I = Visited.
insert(Node);
3616 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3617 auto *
Callee = CalleeEdge->Callee;
3618 if (Visited.
count(Callee)) {
3621 if (CurrentStack.
count(Callee))
3622 CalleeEdge->IsBackedge =
true;
3625 CurrentStack.
insert(Callee);
3626 markBackedges(Callee, Visited, CurrentStack);
3627 CurrentStack.
erase(Callee);
3631template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3632void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3633 DenseSet<const ContextNode *> Visited;
3634 for (
auto &Entry : AllocationCallToContextNodeMap) {
3636 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3639 for (
auto &Entry : AllocationCallToContextNodeMap)
3640 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3653template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3654void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3655 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3656 const DenseSet<uint32_t> &AllocContextIds) {
3666 if (!
Node->hasCall())
3685 auto CallerEdges =
Node->CallerEdges;
3686 for (
auto &
Edge : CallerEdges) {
3688 if (
Edge->isRemoved()) {
3694 if (
Edge->IsBackedge) {
3701 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3702 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3725 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3729 [&](
const std::shared_ptr<ContextEdge> &
A,
3730 const std::shared_ptr<ContextEdge> &
B) {
3733 if (A->ContextIds.empty())
3739 if (B->ContextIds.empty())
3742 if (A->AllocTypes == B->AllocTypes)
3745 return *A->ContextIds.begin() < *B->ContextIds.begin();
3746 return AllocTypeCloningPriority[A->AllocTypes] <
3747 AllocTypeCloningPriority[B->AllocTypes];
3750 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3752 DenseSet<uint32_t> RecursiveContextIds;
3757 DenseSet<uint32_t> AllCallerContextIds;
3758 for (
auto &CE :
Node->CallerEdges) {
3761 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3762 for (
auto Id :
CE->getContextIds())
3763 if (!AllCallerContextIds.
insert(Id).second)
3764 RecursiveContextIds.
insert(Id);
3774 auto CallerEdges =
Node->CallerEdges;
3775 for (
auto &CallerEdge : CallerEdges) {
3777 if (CallerEdge->isRemoved()) {
3781 assert(CallerEdge->Callee == Node);
3790 if (!CallerEdge->Caller->hasCall())
3795 auto CallerEdgeContextsForAlloc =
3797 if (!RecursiveContextIds.
empty())
3798 CallerEdgeContextsForAlloc =
3800 if (CallerEdgeContextsForAlloc.empty())
3803 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3807 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3808 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3809 for (
auto &CalleeEdge :
Node->CalleeEdges)
3810 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3811 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3827 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3828 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3829 if (!CallerEdge->IsBackedge &&
3830 allocTypeToUse(CallerAllocTypeForAlloc) ==
3831 allocTypeToUse(
Node->AllocTypes) &&
3832 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3833 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3837 if (CallerEdge->IsBackedge) {
3841 DeferredBackedges++;
3854 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3855 !Visited.
count(CallerEdge->Caller)) {
3856 const auto OrigIdCount = CallerEdge->getContextIds().size();
3859 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3860 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3864 bool UpdatedEdge =
false;
3865 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3866 for (
auto E :
Node->CallerEdges) {
3868 if (
E->Caller->CloneOf != CallerEdge->Caller)
3872 auto CallerEdgeContextsForAllocNew =
3874 if (CallerEdgeContextsForAllocNew.empty())
3884 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3894 if (CallerEdge->isRemoved())
3904 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3905 if (CallerEdgeContextsForAlloc.empty())
3910 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3911 CalleeEdgeAllocTypesForCallerEdge.clear();
3912 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3913 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3914 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3920 ContextNode *Clone =
nullptr;
3921 for (
auto *CurClone :
Node->Clones) {
3922 if (allocTypeToUse(CurClone->AllocTypes) !=
3923 allocTypeToUse(CallerAllocTypeForAlloc))
3930 assert(!BothSingleAlloc ||
3931 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3937 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3938 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3946 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3947 CallerEdgeContextsForAlloc);
3949 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3952 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3959 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3965void ModuleCallsiteContextGraph::updateAllocationCall(
3970 "memprof", AllocTypeString);
3973 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
3974 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3976 <<
" marked with memprof allocation attribute "
3977 <<
ore::NV(
"Attribute", AllocTypeString));
3980void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
3984 assert(AI->Versions.size() >
Call.cloneNo());
3989ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
3991 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3992 return AllocationType::None;
3993 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3994 ? AllocationType::Cold
3995 : AllocationType::NotCold;
3999IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4001 assert(AI->Versions.size() >
Call.cloneNo());
4005void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4006 FuncInfo CalleeFunc) {
4007 auto *CurF = getCalleeFunc(CallerCall.call());
4008 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4015 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4017 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4019 MismatchedCloneAssignments++;
4022 if (NewCalleeCloneNo > 0)
4023 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4024 OREGetter(CallerCall.call()->getFunction())
4025 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4026 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4027 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4028 <<
" assigned to call function clone "
4029 <<
ore::NV(
"Callee", CalleeFunc.func()));
4032void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4033 FuncInfo CalleeFunc) {
4036 "Caller cannot be an allocation which should not have profiled calls");
4037 assert(CI->Clones.size() > CallerCall.cloneNo());
4038 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4039 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4044 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4046 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4048 MismatchedCloneAssignments++;
4050 CurCalleeCloneNo = NewCalleeCloneNo;
4062 SP->replaceLinkageName(MDName);
4066 TempDISubprogram NewDecl = Decl->
clone();
4067 NewDecl->replaceLinkageName(MDName);
4071CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4073ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4074 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4075 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4080 assert(!
Func.func()->getParent()->getFunction(Name));
4081 NewFunc->setName(Name);
4083 for (
auto &Inst : CallsWithMetadataInFunc) {
4085 assert(Inst.cloneNo() == 0);
4088 OREGetter(
Func.func())
4089 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4090 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4091 return {NewFunc, CloneNo};
4094CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4095 IndexCall>::FuncInfo
4096IndexCallsiteContextGraph::cloneFunctionForCallsite(
4097 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4098 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4112 for (
auto &Inst : CallsWithMetadataInFunc) {
4114 assert(Inst.cloneNo() == 0);
4116 assert(AI->Versions.size() == CloneNo);
4119 AI->Versions.push_back(0);
4122 assert(CI && CI->Clones.size() == CloneNo);
4125 CI->Clones.push_back(0);
4127 CallMap[Inst] = {Inst.call(), CloneNo};
4129 return {
Func.func(), CloneNo};
4146template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4147void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4153 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4154 for (
auto &Entry : AllocationCallToContextNodeMap) {
4156 for (
auto Id :
Node->getContextIds())
4157 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4158 for (
auto *Clone :
Node->Clones) {
4159 for (
auto Id : Clone->getContextIds())
4160 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4167 DenseSet<const ContextNode *> Visited;
4168 for (
auto &Entry : AllocationCallToContextNodeMap) {
4171 mergeClones(Node, Visited, ContextIdToAllocationNode);
4177 auto Clones =
Node->Clones;
4178 for (
auto *Clone : Clones)
4179 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4183 dbgs() <<
"CCG after merging:\n";
4187 exportToDot(
"aftermerge");
4195template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4196void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4197 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4198 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4208 bool FoundUnvisited =
true;
4210 while (FoundUnvisited) {
4212 FoundUnvisited =
false;
4215 auto CallerEdges =
Node->CallerEdges;
4216 for (
auto CallerEdge : CallerEdges) {
4218 if (CallerEdge->Callee != Node)
4223 FoundUnvisited =
true;
4224 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4228 TotalMergeInvokes++;
4229 TotalMergeIters += Iters;
4230 if (Iters > MaxMergeIters)
4231 MaxMergeIters = Iters;
4234 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4237template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4238void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4239 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4240 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4242 if (
Node->emptyContextIds())
4247 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4248 OrigNodeToCloneEdges;
4249 for (
const auto &
E :
Node->CalleeEdges) {
4254 OrigNodeToCloneEdges[
Base].push_back(
E);
4260 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4261 const std::shared_ptr<ContextEdge> &
B) {
4262 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4263 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4264 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4266 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4270 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4275 for (
auto Entry : OrigNodeToCloneEdges) {
4278 auto &CalleeEdges =
Entry.second;
4279 auto NumCalleeClones = CalleeEdges.size();
4281 if (NumCalleeClones == 1)
4292 DenseSet<ContextNode *> OtherCallersToShareMerge;
4293 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4294 OtherCallersToShareMerge);
4299 ContextNode *MergeNode =
nullptr;
4300 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4301 for (
auto CalleeEdge : CalleeEdges) {
4302 auto *OrigCallee = CalleeEdge->Callee;
4308 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4309 MergeNode = OrigCallee;
4310 NonNewMergedNodes++;
4317 if (!OtherCallersToShareMerge.
empty()) {
4318 bool MoveAllCallerEdges =
true;
4319 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4320 if (CalleeCallerE == CalleeEdge)
4322 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4323 MoveAllCallerEdges =
false;
4329 if (MoveAllCallerEdges) {
4330 MergeNode = OrigCallee;
4331 NonNewMergedNodes++;
4338 assert(MergeNode != OrigCallee);
4339 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4342 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4347 if (!OtherCallersToShareMerge.
empty()) {
4351 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4352 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4353 if (CalleeCallerE == CalleeEdge)
4355 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4357 CallerToMoveCount[CalleeCallerE->Caller]++;
4358 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4362 removeNoneTypeCalleeEdges(OrigCallee);
4363 removeNoneTypeCalleeEdges(MergeNode);
4381template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4382void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4383 findOtherCallersToShareMerge(
4385 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4386 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4387 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4388 auto NumCalleeClones = CalleeEdges.size();
4391 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4394 unsigned PossibleOtherCallerNodes = 0;
4398 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4404 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4405 for (
auto CalleeEdge : CalleeEdges) {
4406 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4409 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4410 if (CalleeCallerEdges->Caller == Node) {
4411 assert(CalleeCallerEdges == CalleeEdge);
4414 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4417 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4419 PossibleOtherCallerNodes++;
4423 for (
auto Id : CalleeEdge->getContextIds()) {
4424 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4428 MissingAllocForContextId++;
4431 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4438 for (
auto CalleeEdge : CalleeEdges) {
4439 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4441 if (!PossibleOtherCallerNodes)
4443 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4445 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4447 if (CalleeCallerE == CalleeEdge)
4451 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4456 for (
auto Id : CalleeCallerE->getContextIds()) {
4457 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4462 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4463 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4464 PossibleOtherCallerNodes--;
4471 if (!PossibleOtherCallerNodes)
4476 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4477 if (
Count != NumCalleeClones)
4479 OtherCallersToShareMerge.
insert(OtherCaller);
4514template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4515bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4522 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4526 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4527 const FuncInfo &CalleeFunc) {
4529 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4533 struct FuncCloneInfo {
4538 DenseMap<CallInfo, CallInfo> CallMap;
4543 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4544 FuncInfo OrigFunc(Func);
4549 std::vector<FuncCloneInfo> FuncCloneInfos;
4550 for (
auto &
Call : CallsWithMetadata) {
4551 ContextNode *
Node = getNodeForInst(
Call);
4555 if (!Node ||
Node->Clones.empty())
4558 "Not having a call should have prevented cloning");
4562 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4566 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4568 ContextNode *CallsiteClone,
4571 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4573 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4574 DenseMap<CallInfo, CallInfo> &CallMap =
4575 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4576 CallInfo CallClone(
Call);
4577 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4578 CallClone = It->second;
4579 CallsiteClone->setCall(CallClone);
4581 for (
auto &MatchingCall :
Node->MatchingCalls) {
4582 CallInfo CallClone(MatchingCall);
4583 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4584 CallClone = It->second;
4586 MatchingCall = CallClone;
4593 std::deque<ContextNode *> ClonesWorklist;
4595 if (!
Node->emptyContextIds())
4596 ClonesWorklist.push_back(Node);
4602 unsigned NodeCloneCount = 0;
4603 while (!ClonesWorklist.empty()) {
4604 ContextNode *Clone = ClonesWorklist.front();
4605 ClonesWorklist.pop_front();
4614 if (FuncCloneInfos.size() < NodeCloneCount) {
4616 if (NodeCloneCount == 1) {
4621 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4622 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4626 FuncCloneInfos.push_back(
4627 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4628 AssignCallsiteCloneToFuncClone(
4629 OrigFunc,
Call, Clone,
4630 AllocationCallToContextNodeMap.count(
Call));
4631 for (
auto &CE : Clone->CallerEdges) {
4633 if (!
CE->Caller->hasCall())
4635 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4645 FuncInfo PreviousAssignedFuncClone;
4647 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4648 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4650 bool CallerAssignedToCloneOfFunc =
false;
4651 if (EI != Clone->CallerEdges.end()) {
4652 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4653 PreviousAssignedFuncClone =
4654 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4655 CallerAssignedToCloneOfFunc =
true;
4660 DenseMap<CallInfo, CallInfo> NewCallMap;
4661 unsigned CloneNo = FuncCloneInfos.size();
4662 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4663 "should already exist in the map");
4664 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4665 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4666 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4667 FunctionClonesAnalysis++;
4673 if (!CallerAssignedToCloneOfFunc) {
4674 AssignCallsiteCloneToFuncClone(
4675 NewFuncClone,
Call, Clone,
4676 AllocationCallToContextNodeMap.count(
Call));
4677 for (
auto &CE : Clone->CallerEdges) {
4679 if (!
CE->Caller->hasCall())
4681 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4693 auto CallerEdges = Clone->CallerEdges;
4694 for (
auto CE : CallerEdges) {
4696 if (
CE->isRemoved()) {
4702 if (!
CE->Caller->hasCall())
4705 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4709 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4710 PreviousAssignedFuncClone)
4713 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4726 auto CalleeEdges =
CE->Caller->CalleeEdges;
4727 for (
auto CalleeEdge : CalleeEdges) {
4730 if (CalleeEdge->isRemoved()) {
4735 ContextNode *
Callee = CalleeEdge->Callee;
4739 if (Callee == Clone || !
Callee->hasCall())
4744 if (Callee == CalleeEdge->Caller)
4746 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
4747 removeNoneTypeCalleeEdges(NewClone);
4750 removeNoneTypeCalleeEdges(Callee);
4751 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4755 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
4756 RecordCalleeFuncOfCallsite(
4757 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
4765 CallInfo OrigCall(
Callee->getOrigNode()->Call);
4766 OrigCall.setCloneNo(0);
4767 DenseMap<CallInfo, CallInfo> &CallMap =
4768 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4770 CallInfo NewCall(CallMap[OrigCall]);
4772 NewClone->setCall(NewCall);
4774 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4775 CallInfo OrigMatchingCall(MatchingCall);
4776 OrigMatchingCall.setCloneNo(0);
4778 CallInfo NewCall(CallMap[OrigMatchingCall]);
4781 MatchingCall = NewCall;
4790 auto FindFirstAvailFuncClone = [&]() {
4795 for (
auto &CF : FuncCloneInfos) {
4796 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4797 return CF.FuncClone;
4800 "Expected an available func clone for this callsite clone");
4817 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4818 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4822 auto CloneCallerEdges = Clone->CallerEdges;
4823 for (
auto &
Edge : CloneCallerEdges) {
4827 if (
Edge->isRemoved())
4830 if (!
Edge->Caller->hasCall())
4834 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4835 FuncInfo FuncCloneCalledByCaller =
4836 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4846 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4847 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4855 (FuncCloneAssignedToCurCallsiteClone &&
4856 FuncCloneAssignedToCurCallsiteClone !=
4857 FuncCloneCalledByCaller)) {
4872 if (FuncCloneToNewCallsiteCloneMap.count(
4873 FuncCloneCalledByCaller)) {
4874 ContextNode *NewClone =
4875 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4876 moveEdgeToExistingCalleeClone(
Edge, NewClone);
4878 removeNoneTypeCalleeEdges(NewClone);
4881 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4882 removeNoneTypeCalleeEdges(NewClone);
4883 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4886 ClonesWorklist.push_back(NewClone);
4887 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4891 removeNoneTypeCalleeEdges(Clone);
4899 if (!FuncCloneAssignedToCurCallsiteClone) {
4900 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4902 AssignCallsiteCloneToFuncClone(
4903 FuncCloneCalledByCaller,
Call, Clone,
4904 AllocationCallToContextNodeMap.count(
Call));
4908 assert(FuncCloneAssignedToCurCallsiteClone ==
4909 FuncCloneCalledByCaller);
4918 if (!FuncCloneAssignedToCurCallsiteClone) {
4919 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4920 assert(FuncCloneAssignedToCurCallsiteClone);
4922 AssignCallsiteCloneToFuncClone(
4923 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4924 AllocationCallToContextNodeMap.count(
Call));
4926 assert(FuncCloneToCurNodeCloneMap
4927 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4929 RecordCalleeFuncOfCallsite(
Edge->Caller,
4930 FuncCloneAssignedToCurCallsiteClone);
4950 if (!FuncCloneAssignedToCurCallsiteClone) {
4951 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4952 assert(FuncCloneAssignedToCurCallsiteClone &&
4953 "No available func clone for this callsite clone");
4954 AssignCallsiteCloneToFuncClone(
4955 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4956 AllocationCallToContextNodeMap.contains(
Call));
4961 for (
const auto &PE :
Node->CalleeEdges)
4963 for (
const auto &CE :
Node->CallerEdges)
4965 for (
auto *Clone :
Node->Clones) {
4967 for (
const auto &PE : Clone->CalleeEdges)
4969 for (
const auto &CE : Clone->CallerEdges)
4977 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
4979 auto UpdateCalls = [&](ContextNode *
Node,
4980 DenseSet<const ContextNode *> &Visited,
4981 auto &&UpdateCalls) {
4982 auto Inserted = Visited.insert(Node);
4986 for (
auto *Clone :
Node->Clones)
4987 UpdateCalls(Clone, Visited, UpdateCalls);
4989 for (
auto &
Edge :
Node->CallerEdges)
4990 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
4994 if (!
Node->hasCall() ||
Node->emptyContextIds())
4997 if (
Node->IsAllocation) {
4998 auto AT = allocTypeToUse(
Node->AllocTypes);
5004 !ContextIdToContextSizeInfos.empty()) {
5005 uint64_t TotalCold = 0;
5007 for (
auto Id :
Node->getContextIds()) {
5008 auto TypeI = ContextIdToAllocationType.find(Id);
5009 assert(TypeI != ContextIdToAllocationType.end());
5010 auto CSI = ContextIdToContextSizeInfos.find(Id);
5011 if (CSI != ContextIdToContextSizeInfos.end()) {
5012 for (
auto &
Info : CSI->second) {
5014 if (TypeI->second == AllocationType::Cold)
5015 TotalCold +=
Info.TotalSize;
5020 AT = AllocationType::Cold;
5022 updateAllocationCall(
Node->Call, AT);
5027 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5030 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5031 updateCall(
Node->Call, CalleeFunc);
5033 for (
auto &
Call :
Node->MatchingCalls)
5034 updateCall(
Call, CalleeFunc);
5042 DenseSet<const ContextNode *> Visited;
5043 for (
auto &Entry : AllocationCallToContextNodeMap)
5044 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5058 FunctionsClonedThinBackend++;
5059 for (
unsigned I = 1;
I < NumClones;
I++) {
5060 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5062 FunctionClonesThinBackend++;
5065 for (
auto &BB : *NewF) {
5066 for (
auto &Inst : BB) {
5067 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5068 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5072 auto *PrevF = M.getFunction(Name);
5076 assert(PrevF->isDeclaration());
5077 NewF->takeName(PrevF);
5078 PrevF->replaceAllUsesWith(NewF);
5079 PrevF->eraseFromParent();
5081 NewF->setName(Name);
5084 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5087 if (!FuncToAliasMap.count(&
F))
5089 for (
auto *
A : FuncToAliasMap[&
F]) {
5091 auto *PrevA = M.getNamedAlias(Name);
5093 A->getType()->getPointerAddressSpace(),
5094 A->getLinkage(), Name, NewF);
5095 NewA->copyAttributesFrom(
A);
5099 assert(PrevA->isDeclaration());
5100 NewA->takeName(PrevA);
5101 PrevA->replaceAllUsesWith(NewA);
5102 PrevA->eraseFromParent();
5113 const Function *CallingFunc =
nullptr) {
5132 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5138 if (!SrcFileMD &&
F.isDeclaration()) {
5142 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5147 assert(SrcFileMD || OrigName ==
F.getName());
5149 StringRef SrcFile = M.getSourceFileName();
5161 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5162 F.getName().contains(
'.')) {
5163 OrigName =
F.getName().rsplit(
'.').first;
5172 assert(TheFnVI ||
F.isDeclaration());
5176bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5178 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5179 Symtab = std::make_unique<InstrProfSymtab>();
5190 if (
Error E = Symtab->create(M,
true,
false)) {
5191 std::string SymtabFailure =
toString(std::move(
E));
5192 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5205 auto MIBIter = AllocNode.
MIBs.begin();
5206 for (
auto &MDOp : MemProfMD->
operands()) {
5208 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5213 auto ContextIterBegin =
5217 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5219 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5224 if (LastStackContextId == *ContextIter)
5226 LastStackContextId = *ContextIter;
5227 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5237bool MemProfContextDisambiguation::applyImport(
Module &M) {
5244 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5246 for (
auto &
A :
M.aliases()) {
5247 auto *Aliasee =
A.getAliaseeObject();
5249 FuncToAliasMap[
F].insert(&
A);
5252 if (!initializeIndirectCallPromotionInfo(M))
5259 OptimizationRemarkEmitter ORE(&
F);
5262 bool ClonesCreated =
false;
5263 unsigned NumClonesCreated = 0;
5264 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
5274 if (ClonesCreated) {
5275 assert(NumClonesCreated == NumClones);
5282 ClonesCreated =
true;
5283 NumClonesCreated = NumClones;
5286 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5301 if (CalledFunction != CB->getCalledOperand() &&
5302 (!GA || CalledFunction != GA->getAliaseeObject())) {
5303 SkippedCallsCloning++;
5309 auto CalleeOrigName = CalledFunction->getName();
5310 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5313 if (!StackNode.
Clones[J])
5315 auto NewF =
M.getOrInsertFunction(
5317 CalledFunction->getFunctionType());
5331 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5332 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5334 <<
" assigned to call function clone "
5335 <<
ore::NV(
"Callee", NewF.getCallee()));
5349 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5353 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5355 "enable-import-metadata is needed to emit thinlto_src_module");
5356 StringRef SrcModule =
5359 if (GVS->modulePath() == SrcModule) {
5360 GVSummary = GVS.get();
5364 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5374 if (
FS->allocs().empty() &&
FS->callsites().empty())
5377 auto SI =
FS->callsites().begin();
5378 auto AI =
FS->allocs().begin();
5383 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5386 for (
auto CallsiteIt =
FS->callsites().rbegin();
5387 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5388 auto &Callsite = *CallsiteIt;
5392 if (!Callsite.StackIdIndices.empty())
5394 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5403 for (
auto &BB :
F) {
5404 for (
auto &
I : BB) {
5410 auto *CalledValue = CB->getCalledOperand();
5411 auto *CalledFunction = CB->getCalledFunction();
5412 if (CalledValue && !CalledFunction) {
5413 CalledValue = CalledValue->stripPointerCasts();
5420 assert(!CalledFunction &&
5421 "Expected null called function in callsite for alias");
5425 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5426 I.getMetadata(LLVMContext::MD_callsite));
5427 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5431 if (CB->getAttributes().hasFnAttr(
"memprof")) {
5433 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5434 ? AllocTypeColdThinBackend++
5435 : AllocTypeNotColdThinBackend++;
5436 OrigAllocsThinBackend++;
5437 AllocVersionsThinBackend++;
5438 if (!MaxAllocVersionsThinBackend)
5439 MaxAllocVersionsThinBackend = 1;
5446 auto &AllocNode = *(AI++);
5454 CloneFuncIfNeeded(AllocNode.Versions.size());
5456 OrigAllocsThinBackend++;
5457 AllocVersionsThinBackend += AllocNode.Versions.size();
5458 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5459 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5469 if (AllocNode.Versions.size() == 1 &&
5472 AllocationType::NotCold ||
5474 AllocationType::None);
5475 UnclonableAllocsThinBackend++;
5481 return Type == ((uint8_t)AllocationType::NotCold |
5482 (uint8_t)AllocationType::Cold);
5486 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5488 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5491 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5492 : AllocTypeNotColdThinBackend++;
5507 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5508 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5510 <<
" marked with memprof allocation attribute "
5511 <<
ore::NV(
"Attribute", AllocTypeString));
5513 }
else if (!CallsiteContext.empty()) {
5514 if (!CalledFunction) {
5518 assert(!CI || !CI->isInlineAsm());
5528 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5534 CloneFuncIfNeeded(NumClones);
5539 assert(SI !=
FS->callsites().end());
5540 auto &StackNode = *(
SI++);
5546 for (
auto StackId : CallsiteContext) {
5548 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
5554 CloneCallsite(StackNode, CB, CalledFunction);
5556 }
else if (CB->isTailCall() && CalledFunction) {
5559 ValueInfo CalleeVI =
5561 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5562 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5563 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5564 CloneCallsite(Callsite->second, CB, CalledFunction);
5571 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5581 for (
auto &BB :
F) {
5582 for (
auto &
I : BB) {
5585 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5586 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5594unsigned MemProfContextDisambiguation::recordICPInfo(
5599 uint32_t NumCandidates;
5600 uint64_t TotalCount;
5601 auto CandidateProfileData =
5602 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5604 if (CandidateProfileData.empty())
5610 bool ICPNeeded =
false;
5611 unsigned NumClones = 0;
5612 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5613 for (
const auto &Candidate : CandidateProfileData) {
5615 auto CalleeValueInfo =
5617 ImportSummary->getValueInfo(Candidate.Value);
5620 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5622 auto &StackNode = *(
SI++);
5627 [](
unsigned CloneNo) { return CloneNo != 0; });
5637 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5638 TotalCount, CallsiteInfoStartIndex});
5642void MemProfContextDisambiguation::performICP(
5644 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5646 OptimizationRemarkEmitter &ORE) {
5653 for (
auto &
Info : ICallAnalysisInfo) {
5656 auto TotalCount =
Info.TotalCount;
5657 unsigned NumPromoted = 0;
5658 unsigned NumClones = 0;
5660 for (
auto &Candidate :
Info.CandidateProfileData) {
5671 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5672 if (TargetFunction ==
nullptr ||
5680 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
5681 <<
"Memprof cannot promote indirect call: target with md5sum "
5682 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5691 const char *Reason =
nullptr;
5694 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
5695 <<
"Memprof cannot promote indirect call to "
5696 <<
ore::NV(
"TargetFunction", TargetFunction)
5697 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5708 CallBase *CBClone = CB;
5709 for (
unsigned J = 0; J < NumClones; J++) {
5718 TotalCount, isSamplePGO, &ORE);
5719 auto *TargetToUse = TargetFunction;
5722 if (StackNode.
Clones[J]) {
5741 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5743 <<
" promoted and assigned to call function clone "
5744 <<
ore::NV(
"Callee", TargetToUse));
5748 TotalCount -= Candidate.Count;
5753 CallBase *CBClone = CB;
5754 for (
unsigned J = 0; J < NumClones; J++) {
5759 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
5762 if (TotalCount != 0)
5764 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
5765 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
5770template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
5771bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
5773 dbgs() <<
"CCG before cloning:\n";
5777 exportToDot(
"postbuild");
5790 dbgs() <<
"CCG after cloning:\n";
5794 exportToDot(
"cloned");
5796 bool Changed = assignFunctions();
5799 dbgs() <<
"CCG after assigning function clones:\n";
5803 exportToDot(
"clonefuncassign");
5806 printTotalSizes(
errs());
5811bool MemProfContextDisambiguation::processModule(
5813 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
5818 return applyImport(M);
5831 ModuleCallsiteContextGraph CCG(M, OREGetter);
5832 return CCG.process();
5837 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
5842 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
5846 "-memprof-dot-scope=context requires -memprof-dot-context-id");
5850 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
5851 "-memprof-dot-context-id");
5852 if (ImportSummary) {
5862 auto ReadSummaryFile =
5864 if (!ReadSummaryFile) {
5871 if (!ImportSummaryForTestingOrErr) {
5877 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
5878 ImportSummary = ImportSummaryForTesting.get();
5887 if (!processModule(M, OREGetter))
5904 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
InterleavedRange< Range > interleaved(const Range &R, StringRef Separator=", ", StringRef Prefix="", StringRef Suffix="")
Output range R as a sequence of interleaved elements.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...