18#include "llvm/Config/llvm-config.h"
41#define DEBUG_TYPE "block-freq"
45 "check-bfi-unknown-block-queries",
47 cl::desc(
"Check if block frequency is queried for an unknown block "
48 "for debugging missed BFI updates"));
52 cl::desc(
"Apply an iterative post-processing to infer correct BFI counts"));
56 cl::desc(
"Iterative inference: maximum number of update iterations "
61 cl::desc(
"Iterative inference: delta convergence precision; smaller values "
62 "typically lead to better results at the cost of worsen runtime"));
71#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
83 for (
int Digits = 0; Digits < 16; ++Digits)
116struct DitheringDistributer {
120 DitheringDistributer(Distribution &Dist,
const BlockMass &Mass);
127DitheringDistributer::DitheringDistributer(Distribution &Dist,
130 RemWeight = Dist.Total;
134BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
135 assert(Weight &&
"invalid weight");
136 assert(Weight <= RemWeight);
137 BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
145void Distribution::add(
const BlockNode &Node, uint64_t Amount,
146 Weight::DistType
Type) {
147 assert(Amount &&
"invalid weight of 0");
148 uint64_t NewTotal =
Total + Amount;
151 bool IsOverflow = NewTotal <
Total;
152 assert(!(DidOverflow && IsOverflow) &&
"unexpected repeated overflow");
153 DidOverflow |= IsOverflow;
159 Weights.push_back(Weight(
Type, Node, Amount));
163 assert(OtherW.TargetNode.isValid());
168 assert(W.Type == OtherW.Type);
169 assert(W.TargetNode == OtherW.TargetNode);
170 assert(OtherW.Amount &&
"Expected non-zero weight");
171 if (W.Amount > W.Amount + OtherW.Amount)
175 W.Amount += OtherW.Amount;
180 llvm::sort(Weights, [](
const Weight &L,
const Weight &R) {
181 return L.TargetNode < R.TargetNode;
185 WeightList::iterator O = Weights.begin();
186 for (WeightList::const_iterator
I = O, L = O,
E = Weights.end();
I !=
E;
191 for (++L; L !=
E &&
I->TargetNode == L->TargetNode; ++L)
196 Weights.erase(O, Weights.end());
204 for (
const Weight &W : Weights)
208 if (Weights.size() == Combined.size())
213 Weights.reserve(Combined.size());
214 for (
const auto &
I : Combined)
215 Weights.push_back(
I.second);
220 if (Weights.size() > 128) {
233 return (
N >> Shift) + (UINT64_C(1) &
N >> (Shift - 1));
259 else if (
Total > UINT32_MAX)
268 return Sum + W.Amount;
270 "Expected total to be correct");
282 assert(W.TargetNode.isValid());
284 assert(W.Amount <= UINT32_MAX);
295 std::vector<FrequencyData>().swap(
Freqs);
297 std::vector<WorkingData>().swap(
Working);
305 std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
308 BFI.Freqs = std::move(SavedFreqs);
309 BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
327 auto debugSuccessor = [&](
const char *
Type) {
330 if (!isLoopHeader(Resolved))
332 if (Resolved != Succ)
336 (void)debugSuccessor;
339 if (isLoopHeader(Resolved)) {
345 if (
Working[Resolved.Index].getContainingLoop() != OuterLoop) {
351 if (Resolved < Pred) {
352 if (!isLoopHeader(Pred)) {
355 "unhandled irreducible control flow");
366 "unhandled irreducible control flow");
377 for (
const auto &
I :
Loop.Exits)
400 const Scaled64 InfiniteLoopScale(1, 12);
405 for (
auto &Mass :
Loop.BackedgeMass)
406 TotalBackedgeMass += Mass;
418 <<
" - scale = " <<
Loop.Scale <<
"\n");
427 if (
auto *
Loop =
Working[M.Index].getPackagedLoop())
431 Loop.IsPackaged =
true;
436 const DitheringDistributer &
D,
const BlockNode &
T,
438 dbgs() <<
" => assign " << M <<
" (" <<
D.RemMass <<
")";
442 dbgs() <<
" to " << BFI.getBlockName(
T);
454 DitheringDistributer
D(Dist, Mass);
460 Working[W.TargetNode.Index].getMass() += Taken;
466 assert(OuterLoop &&
"backedge or exit outside of loop");
483 const Scaled64 &Min,
const Scaled64 &Max) {
491 const unsigned MaxBits =
sizeof(Scaled64::DigitsType) * CHAR_BIT;
495 const unsigned Slack = 10;
496 Scaled64 ScalingFactor = Scaled64(1, MaxBits - Slack) / Max;
499 LLVM_DEBUG(
dbgs() <<
"float-to-int: min = " << Min <<
", max = " << Max
500 <<
", factor = " << ScalingFactor <<
"\n");
502 for (
size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
503 Scaled64
Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
504 BFI.Freqs[Index].Integer = std::max(UINT64_C(1),
Scaled.toInt<
uint64_t>());
505 LLVM_DEBUG(
dbgs() <<
" - " << BFI.getBlockName(Index) <<
": float = "
506 << BFI.Freqs[Index].Scaled <<
", scaled = " <<
Scaled
507 <<
", int = " << BFI.Freqs[Index].Integer <<
"\n");
517 <<
": mass = " <<
Loop.Mass <<
", scale = " <<
Loop.Scale
520 Loop.IsPackaged =
false;
526 for (
const BlockNode &
N :
Loop.Nodes) {
527 const auto &Working = BFI.Working[
N.Index];
528 Scaled64 &
F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
529 : BFI.Freqs[
N.Index].Scaled;
530 Scaled64 New =
Loop.Scale *
F;
539 for (
size_t Index = 0; Index <
Working.size(); ++Index)
551 for (
size_t Index = 0; Index <
Working.size(); ++Index) {
569 if (!
Node.isValid()) {
583std::optional<uint64_t>
586 bool AllowSynthetic)
const {
592 auto EntryCount =
F.getEntryCount(AllowSynthetic);
596 APInt BlockCount(128, EntryCount->getCount());
599 BlockCount *= BlockFreq;
602 BlockCount = (BlockCount + EntryFreq.
lshr(1)).
udiv(EntryFreq);
622 assert(
Node.isValid() &&
"Expected valid node");
640 for (
auto N : OuterLoop.
Nodes)
647 for (
uint32_t Index = 0; Index <
BFI.Working.size(); ++Index)
648 if (!
BFI.Working[Index].isPackaged())
660 if (OuterLoop && OuterLoop->
isHeader(Succ))
666 Irr.
Edges.push_back(&SuccIrr);
667 SuccIrr.
Edges.push_front(&Irr);
692 const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
693 LoopData::NodeList &Headers, LoopData::NodeList &Others) {
698 for (
const auto *
I : SCC)
702 auto &Irr = *
I->first;
703 for (
const auto *
P :
make_range(Irr.pred_begin(), Irr.pred_end())) {
709 Headers.push_back(Irr.Node);
715 assert(Headers.size() >= 2 &&
716 "Expected irreducible CFG; -loop-info is likely invalid");
717 if (Headers.size() == InSCC.
size()) {
724 for (
const auto &
I : InSCC) {
729 auto &Irr = *
I.first;
730 for (
const auto *
P :
make_range(Irr.pred_begin(), Irr.pred_end())) {
732 if (
P->Node < Irr.Node)
741 Headers.push_back(Irr.Node);
746 if (Headers.back() == Irr.Node)
751 Others.push_back(Irr.Node);
752 LLVM_DEBUG(
dbgs() <<
" => other = " << BFI.getBlockName(Irr.Node) <<
"\n");
760 LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
761 const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
765 LoopData::NodeList Headers;
766 LoopData::NodeList Others;
769 auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
770 Headers.end(), Others.begin(), Others.end());
773 for (
const auto &
N :
Loop->Nodes)
774 if (BFI.Working[
N.Index].isLoopHeader())
775 BFI.Working[
N.Index].Loop->Parent = &*
Loop;
777 BFI.Working[
N.Index].Loop = &*
Loop;
780iterator_range<std::list<LoopData>::iterator>
783 std::list<LoopData>::iterator Insert) {
784 assert((OuterLoop ==
nullptr) == (Insert ==
Loops.begin()));
785 auto Prev = OuterLoop ? std::prev(Insert) :
Loops.end();
806 for (
auto I = O, E = OuterLoop.
Nodes.
end();
I != E; ++
I)
807 if (!
Working[
I->Index].isPackaged())
813 assert(
Loop.isIrreducible() &&
"this only makes sense on irreducible loops");
826 auto &HeaderNode =
Loop.Nodes[
H];
827 auto &BackedgeMass =
Loop.BackedgeMass[
Loop.getHeaderIndex(HeaderNode)];
831 if (BackedgeMass.getMass() > 0)
832 Dist.
addLocal(HeaderNode, BackedgeMass.getMass());
837 DitheringDistributer
D(Dist, LoopMass);
840 <<
" to headers using above weights\n");
844 Working[W.TargetNode.Index].getMass() = Taken;
851 DitheringDistributer
D(Dist, LoopMass);
855 Working[W.TargetNode.Index].getMass() = Taken;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static void combineWeightsBySorting(WeightList &Weights)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeightsByHashing(WeightList &Weights)
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
static void combineWeight(Weight &W, const Weight &OtherW)
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
static void combineWeights(WeightList &Weights)
static char getHexDigit(int N)
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode * > &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode * > &SCC)
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
This file defines the SmallString class.
Class for arbitrary precision integers.
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Base class for BlockFrequencyInfoImpl.
std::vector< WorkingData > Working
Loop data: see initializeLoops().
std::list< LoopData > Loops
Indexed information about loops.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
ScaledNumber< uint64_t > Scaled64
std::string getLoopName(const LoopData &Loop) const
bool isIrrLoopHeader(const BlockNode &Node)
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
bfi_detail::BlockMass BlockMass
void packageLoop(LoopData &Loop)
Package up a loop.
virtual std::string getBlockName(const BlockNode &Node) const
void finalizeMetrics()
Finalize frequency metrics.
void setBlockFreq(const BlockNode &Node, BlockFrequency Freq)
BlockFrequency getEntryFreq() const
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
void clear()
Clear all memory.
std::optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node, bool AllowSynthetic=false) const
BlockFrequency getBlockFreq(const BlockNode &Node) const
void distributeIrrLoopHeaderMass(Distribution &Dist)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
void unwrapLoops()
Unwrap loops.
std::optional< uint64_t > getProfileCountFromFreq(const Function &F, BlockFrequency Freq, bool AllowSynthetic=false) const
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Simple representation of a scaled number.
static ScaledNumber getLargest()
ScaledNumber inverse() const
static ScaledNumber getZero()
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
The instances of the Type class are immutable: once they are created, they are never changed.
raw_ostream & print(raw_ostream &OS) const
static BlockMass getEmpty()
static BlockMass getFull()
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
llvm::cl::opt< unsigned > IterativeBFIMaxIterationsPerBlock
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
llvm::cl::opt< bool > CheckBFIUnknownBlockQueries
llvm::cl::opt< double > IterativeBFIPrecision
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Representative of a block.
Distribution of unscaled probability weight.
void addBackedge(const BlockNode &Node, uint64_t Amount)
SmallVector< Weight, 4 > WeightList
WeightList Weights
Individual successor weights.
uint64_t Total
Sum of all weights.
void normalize()
Normalize the distribution.
void addExit(const BlockNode &Node, uint64_t Amount)
bool DidOverflow
Whether Total did overflow.
void addLocal(const BlockNode &Node, uint64_t Amount)
Stats about a block itself.
bool isHeader(const BlockNode &Node) const
ExitMap Exits
Successor edges (and weights).
bool isIrreducible() const
BlockNode getHeader() const
NodeList Nodes
Header and the members of the loop.
HeaderMassList BackedgeMass
Mass returned to each loop header.
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
Unscaled probability weight.
const GraphT::IrrNode * NodeRef
static ChildIteratorType child_begin(NodeRef N)
static ChildIteratorType child_end(NodeRef N)
bfi_detail::IrreducibleGraph GraphT
GraphT::IrrNode::iterator ChildIteratorType
static NodeRef getEntryNode(const GraphT &G)
std::deque< const IrrNode * > Edges
Graph of irreducible control flow.
void addNodesInFunction()
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
BFIBase::BlockNode BlockNode
std::vector< IrrNode > Nodes
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
void addNode(const BlockNode &Node)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)