LLVM 22.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Loop Distribution Pass. Its main focus is to
10// distribute loops that cannot be vectorized due to dependence cycles. It
11// tries to isolate the offending dependences into a new loop allowing
12// vectorization of the remaining parts.
13//
14// For dependence analysis, the pass uses the LoopVectorizer's
15// LoopAccessAnalysis. Because this analysis presumes no change in the order of
16// memory operations, special care is taken to preserve the lexical order of
17// these operations.
18//
19// Similarly to the Vectorizer, the pass also supports loop versioning to
20// run-time disambiguate potentially overlapping arrays.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SetVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/Twine.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constants.h"
47#include "llvm/IR/Dominators.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/Instruction.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Value.h"
57#include "llvm/Support/Debug.h"
65#include <cassert>
66#include <list>
67#include <tuple>
68#include <utility>
69
70using namespace llvm;
71
72#define LDIST_NAME "loop-distribute"
73#define DEBUG_TYPE LDIST_NAME
74
75/// @{
76/// Metadata attribute names
77static const char *const LLVMLoopDistributeFollowupAll =
78 "llvm.loop.distribute.followup_all";
79static const char *const LLVMLoopDistributeFollowupCoincident =
80 "llvm.loop.distribute.followup_coincident";
81static const char *const LLVMLoopDistributeFollowupSequential =
82 "llvm.loop.distribute.followup_sequential";
83static const char *const LLVMLoopDistributeFollowupFallback =
84 "llvm.loop.distribute.followup_fallback";
85/// @}
86
87static cl::opt<bool>
88 LDistVerify("loop-distribute-verify", cl::Hidden,
89 cl::desc("Turn on DominatorTree and LoopInfo verification "
90 "after Loop Distribution"),
91 cl::init(false));
92
94 "loop-distribute-non-if-convertible", cl::Hidden,
95 cl::desc("Whether to distribute into a loop that may not be "
96 "if-convertible by the loop vectorizer"),
97 cl::init(false));
98
100 "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
101 cl::desc("The maximum number of SCEV checks allowed for Loop "
102 "Distribution"));
103
105 "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
107 cl::desc("The maximum number of SCEV checks allowed for Loop "
108 "Distribution for loop marked with #pragma clang loop "
109 "distribute(enable)"));
110
112 "enable-loop-distribute", cl::Hidden,
113 cl::desc("Enable the new, experimental LoopDistribution Pass"),
114 cl::init(false));
115
116static const char *DistributedMetaData = "llvm.loop.isdistributed";
117
118STATISTIC(NumLoopsDistributed, "Number of loops distributed");
119
120namespace {
121
122/// Maintains the set of instructions of the loop for a partition before
123/// cloning. After cloning, it hosts the new loop.
124class InstPartition {
125 using InstructionSet = SmallSetVector<Instruction *, 8>;
126
127public:
128 InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
129 : DepCycle(DepCycle), OrigLoop(L) {
130 Set.insert(I);
131 }
132
133 /// Returns whether this partition contains a dependence cycle.
134 bool hasDepCycle() const { return DepCycle; }
135
136 /// Adds an instruction to this partition.
137 void add(Instruction *I) { Set.insert(I); }
138
139 /// Collection accessors.
140 InstructionSet::iterator begin() { return Set.begin(); }
141 InstructionSet::iterator end() { return Set.end(); }
142 InstructionSet::const_iterator begin() const { return Set.begin(); }
143 InstructionSet::const_iterator end() const { return Set.end(); }
144 bool empty() const { return Set.empty(); }
145
146 /// Moves this partition into \p Other. This partition becomes empty
147 /// after this.
148 void moveTo(InstPartition &Other) {
149 Other.Set.insert_range(Set);
150 Set.clear();
151 Other.DepCycle |= DepCycle;
152 }
153
154 /// Populates the partition with a transitive closure of all the
155 /// instructions that the seeded instructions dependent on.
156 void populateUsedSet() {
157 // FIXME: We currently don't use control-dependence but simply include all
158 // blocks (possibly empty at the end) and let simplifycfg mostly clean this
159 // up.
160 for (auto *B : OrigLoop->getBlocks())
161 Set.insert(B->getTerminator());
162
163 // Follow the use-def chains to form a transitive closure of all the
164 // instructions that the originally seeded instructions depend on.
165 SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
166 while (!Worklist.empty()) {
167 Instruction *I = Worklist.pop_back_val();
168 // Insert instructions from the loop that we depend on.
169 for (Value *V : I->operand_values()) {
170 auto *I = dyn_cast<Instruction>(V);
171 if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
172 Worklist.push_back(I);
173 }
174 }
175 }
176
177 /// Clones the original loop.
178 ///
179 /// Updates LoopInfo and DominatorTree using the information that block \p
180 /// LoopDomBB dominates the loop.
181 Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
182 unsigned Index, LoopInfo *LI,
183 DominatorTree *DT) {
184 ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
185 VMap, Twine(".ldist") + Twine(Index),
186 LI, DT, ClonedLoopBlocks);
187 return ClonedLoop;
188 }
189
190 /// The cloned loop. If this partition is mapped to the original loop,
191 /// this is null.
192 const Loop *getClonedLoop() const { return ClonedLoop; }
193
194 /// Returns the loop where this partition ends up after distribution.
195 /// If this partition is mapped to the original loop then use the block from
196 /// the loop.
197 Loop *getDistributedLoop() const {
198 return ClonedLoop ? ClonedLoop : OrigLoop;
199 }
200
201 /// The VMap that is populated by cloning and then used in
202 /// remapinstruction to remap the cloned instructions.
203 ValueToValueMapTy &getVMap() { return VMap; }
204
205 /// Remaps the cloned instructions using VMap.
206 void remapInstructions() {
207 remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
208 }
209
210 /// Based on the set of instructions selected for this partition,
211 /// removes the unnecessary ones.
212 void removeUnusedInsts() {
213 SmallVector<Instruction *, 8> Unused;
214
215 for (auto *Block : OrigLoop->getBlocks())
216 for (auto &Inst : *Block)
217 if (!Set.count(&Inst)) {
218 Instruction *NewInst = &Inst;
219 if (!VMap.empty())
220 NewInst = cast<Instruction>(VMap[NewInst]);
221
222 assert(!isa<BranchInst>(NewInst) &&
223 "Branches are marked used early on");
224 Unused.push_back(NewInst);
225 }
226
227 // Delete the instructions backwards, as it has a reduced likelihood of
228 // having to update as many def-use and use-def chains.
229 for (auto *Inst : reverse(Unused)) {
230 salvageDebugInfo(*Inst);
231 if (!Inst->use_empty())
232 Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
233 Inst->eraseFromParent();
234 }
235 }
236
237 void print(raw_ostream &OS) const {
238 OS << (DepCycle ? " (cycle)\n" : "\n");
239 for (auto *I : Set)
240 // Prefix with the block name.
241 OS << " " << I->getParent()->getName() << ":" << *I << "\n";
242 }
243
244 void printBlocks(raw_ostream &OS) const {
245 for (auto *BB : getDistributedLoop()->getBlocks())
246 OS << *BB;
247 }
248
249private:
250 /// Instructions from OrigLoop selected for this partition.
251 InstructionSet Set;
252
253 /// Whether this partition contains a dependence cycle.
254 bool DepCycle;
255
256 /// The original loop.
257 Loop *OrigLoop;
258
259 /// The cloned loop. If this partition is mapped to the original loop,
260 /// this is null.
261 Loop *ClonedLoop = nullptr;
262
263 /// The blocks of ClonedLoop including the preheader. If this
264 /// partition is mapped to the original loop, this is empty.
265 SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
266
267 /// These gets populated once the set of instructions have been
268 /// finalized. If this partition is mapped to the original loop, these are not
269 /// set.
271};
272
273/// Holds the set of Partitions. It populates them, merges them and then
274/// clones the loops.
275class InstPartitionContainer {
276 using InstToPartitionIdT = DenseMap<Instruction *, int>;
277
278public:
279 InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
280 : L(L), LI(LI), DT(DT) {}
281
282 /// Returns the number of partitions.
283 unsigned getSize() const { return PartitionContainer.size(); }
284
285 /// Adds \p Inst into the current partition if that is marked to
286 /// contain cycles. Otherwise start a new partition for it.
287 void addToCyclicPartition(Instruction *Inst) {
288 // If the current partition is non-cyclic. Start a new one.
289 if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
290 PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
291 else
292 PartitionContainer.back().add(Inst);
293 }
294
295 /// Adds \p Inst into a partition that is not marked to contain
296 /// dependence cycles.
297 ///
298 // Initially we isolate memory instructions into as many partitions as
299 // possible, then later we may merge them back together.
300 void addToNewNonCyclicPartition(Instruction *Inst) {
301 PartitionContainer.emplace_back(Inst, L);
302 }
303
304 /// Merges adjacent non-cyclic partitions.
305 ///
306 /// The idea is that we currently only want to isolate the non-vectorizable
307 /// partition. We could later allow more distribution among these partition
308 /// too.
309 void mergeAdjacentNonCyclic() {
310 mergeAdjacentPartitionsIf(
311 [](const InstPartition *P) { return !P->hasDepCycle(); });
312 }
313
314 /// If a partition contains only conditional stores, we won't vectorize
315 /// it. Try to merge it with a previous cyclic partition.
316 void mergeNonIfConvertible() {
317 mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
318 if (Partition->hasDepCycle())
319 return true;
320
321 // Now, check if all stores are conditional in this partition.
322 bool seenStore = false;
323
324 for (auto *Inst : *Partition)
325 if (isa<StoreInst>(Inst)) {
326 seenStore = true;
328 return false;
329 }
330 return seenStore;
331 });
332 }
333
334 /// Merges the partitions according to various heuristics.
335 void mergeBeforePopulating() {
336 mergeAdjacentNonCyclic();
338 mergeNonIfConvertible();
339 }
340
341 /// Merges partitions in order to ensure that no loads are duplicated.
342 ///
343 /// We can't duplicate loads because that could potentially reorder them.
344 /// LoopAccessAnalysis provides dependency information with the context that
345 /// the order of memory operation is preserved.
346 ///
347 /// Return if any partitions were merged.
348 bool mergeToAvoidDuplicatedLoads() {
349 using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
350 using ToBeMergedT = EquivalenceClasses<InstPartition *>;
351
352 LoadToPartitionT LoadToPartition;
353 ToBeMergedT ToBeMerged;
354
355 // Step through the partitions and create equivalence between partitions
356 // that contain the same load. Also put partitions in between them in the
357 // same equivalence class to avoid reordering of memory operations.
358 for (PartitionContainerT::iterator I = PartitionContainer.begin(),
359 E = PartitionContainer.end();
360 I != E; ++I) {
361 auto *PartI = &*I;
362
363 // If a load occurs in two partitions PartI and PartJ, merge all
364 // partitions (PartI, PartJ] into PartI.
365 for (Instruction *Inst : *PartI)
366 if (isa<LoadInst>(Inst)) {
367 bool NewElt;
368 LoadToPartitionT::iterator LoadToPart;
369
370 std::tie(LoadToPart, NewElt) =
371 LoadToPartition.insert(std::make_pair(Inst, PartI));
372 if (!NewElt) {
374 dbgs()
375 << "LDist: Merging partitions due to this load in multiple "
376 << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
377 << *Inst << "\n");
378
379 auto PartJ = I;
380 do {
381 --PartJ;
382 ToBeMerged.unionSets(PartI, &*PartJ);
383 } while (&*PartJ != LoadToPart->second);
384 }
385 }
386 }
387 if (ToBeMerged.empty())
388 return false;
389
390 // Merge the member of an equivalence class into its class leader. This
391 // makes the members empty.
392 for (const auto &C : ToBeMerged) {
393 if (!C->isLeader())
394 continue;
395
396 auto PartI = C->getData();
397 for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(*C)),
398 ToBeMerged.member_end())) {
399 PartJ->moveTo(*PartI);
400 }
401 }
402
403 // Remove the empty partitions.
404 PartitionContainer.remove_if(
405 [](const InstPartition &P) { return P.empty(); });
406
407 return true;
408 }
409
410 /// Sets up the mapping between instructions to partitions. If the
411 /// instruction is duplicated across multiple partitions, set the entry to -1.
412 void setupPartitionIdOnInstructions() {
413 int PartitionID = 0;
414 for (const auto &Partition : PartitionContainer) {
415 for (Instruction *Inst : Partition) {
416 bool NewElt;
418
419 std::tie(Iter, NewElt) =
420 InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
421 if (!NewElt)
422 Iter->second = -1;
423 }
424 ++PartitionID;
425 }
426 }
427
428 /// Populates the partition with everything that the seeding
429 /// instructions require.
430 void populateUsedSet() {
431 for (auto &P : PartitionContainer)
432 P.populateUsedSet();
433 }
434
435 /// This performs the main chunk of the work of cloning the loops for
436 /// the partitions.
437 void cloneLoops() {
438 BasicBlock *OrigPH = L->getLoopPreheader();
439 // At this point the predecessor of the preheader is either the memcheck
440 // block or the top part of the original preheader.
441 BasicBlock *Pred = OrigPH->getSinglePredecessor();
442 assert(Pred && "Preheader does not have a single predecessor");
443 BasicBlock *ExitBlock = L->getExitBlock();
444 assert(ExitBlock && "No single exit block");
445 Loop *NewLoop;
446
447 assert(!PartitionContainer.empty() && "at least two partitions expected");
448 // We're cloning the preheader along with the loop so we already made sure
449 // it was empty.
450 assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
451 "preheader not empty");
452
453 // Preserve the original loop ID for use after the transformation.
454 MDNode *OrigLoopID = L->getLoopID();
455
456 // Create a loop for each partition except the last. Clone the original
457 // loop before PH along with adding a preheader for the cloned loop. Then
458 // update PH to point to the newly added preheader.
459 BasicBlock *TopPH = OrigPH;
460 unsigned Index = getSize() - 1;
461 for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
462 NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
463
464 Part.getVMap()[ExitBlock] = TopPH;
465 Part.remapInstructions();
466 setNewLoopID(OrigLoopID, &Part);
467 --Index;
468 TopPH = NewLoop->getLoopPreheader();
469 }
470 Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
471
472 // Also set a new loop ID for the last loop.
473 setNewLoopID(OrigLoopID, &PartitionContainer.back());
474
475 // Now go in forward order and update the immediate dominator for the
476 // preheaders with the exiting block of the previous loop. Dominance
477 // within the loop is updated in cloneLoopWithPreheader.
478 for (auto Curr = PartitionContainer.cbegin(),
479 Next = std::next(PartitionContainer.cbegin()),
480 E = PartitionContainer.cend();
481 Next != E; ++Curr, ++Next)
482 DT->changeImmediateDominator(
483 Next->getDistributedLoop()->getLoopPreheader(),
484 Curr->getDistributedLoop()->getExitingBlock());
485 }
486
487 /// Removes the dead instructions from the cloned loops.
488 void removeUnusedInsts() {
489 for (auto &Partition : PartitionContainer)
490 Partition.removeUnusedInsts();
491 }
492
493 /// For each memory pointer, it computes the partitionId the pointer is
494 /// used in.
495 ///
496 /// This returns an array of int where the I-th entry corresponds to I-th
497 /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
498 /// partitions its entry is set to -1.
499 SmallVector<int, 8>
500 computePartitionSetForPointers(const LoopAccessInfo &LAI) {
501 const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
502
503 unsigned N = RtPtrCheck->Pointers.size();
504 SmallVector<int, 8> PtrToPartitions(N);
505 for (unsigned I = 0; I < N; ++I) {
506 Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
507 auto Instructions = LAI.getInstructionsForAccess(Ptr, /* IsWrite */ true);
508 auto ReadInstructions =
509 LAI.getInstructionsForAccess(Ptr, /* IsWrite */ false);
510 Instructions.append(ReadInstructions.begin(), ReadInstructions.end());
511
512 int &Partition = PtrToPartitions[I];
513 // First set it to uninitialized.
514 Partition = -2;
515 for (Instruction *Inst : Instructions) {
516 // Note that this could be -1 if Inst is duplicated across multiple
517 // partitions.
518 int ThisPartition = this->InstToPartitionId[Inst];
519 if (Partition == -2)
520 Partition = ThisPartition;
521 // -1 means belonging to multiple partitions.
522 else if (Partition == -1)
523 break;
524 else if (Partition != (int)ThisPartition)
525 Partition = -1;
526 }
527 assert(Partition != -2 && "Pointer not belonging to any partition");
528 }
529
530 return PtrToPartitions;
531 }
532
533 void print(raw_ostream &OS) const {
534 unsigned Index = 0;
535 for (const auto &P : PartitionContainer) {
536 OS << "LDist: Partition " << Index++ << ":";
537 P.print(OS);
538 }
539 }
540
541 void dump() const { print(dbgs()); }
542
543#ifndef NDEBUG
544 friend raw_ostream &operator<<(raw_ostream &OS,
545 const InstPartitionContainer &Partitions) {
546 Partitions.print(OS);
547 return OS;
548 }
549#endif
550
551 void printBlocks(raw_ostream &OS) const {
552 unsigned Index = 0;
553 for (const auto &P : PartitionContainer) {
554 OS << "LDist: Partition " << Index++ << ":";
555 P.printBlocks(OS);
556 }
557 }
558
559private:
560 using PartitionContainerT = std::list<InstPartition>;
561
562 /// List of partitions.
563 PartitionContainerT PartitionContainer;
564
565 /// Mapping from Instruction to partition Id. If the instruction
566 /// belongs to multiple partitions the entry contains -1.
567 InstToPartitionIdT InstToPartitionId;
568
569 Loop *L;
570 LoopInfo *LI;
571 DominatorTree *DT;
572
573 /// The control structure to merge adjacent partitions if both satisfy
574 /// the \p Predicate.
575 template <class UnaryPredicate>
576 void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
577 InstPartition *PrevMatch = nullptr;
578 for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
579 auto DoesMatch = Predicate(&*I);
580 if (PrevMatch == nullptr && DoesMatch) {
581 PrevMatch = &*I;
582 ++I;
583 } else if (PrevMatch != nullptr && DoesMatch) {
584 I->moveTo(*PrevMatch);
585 I = PartitionContainer.erase(I);
586 } else {
587 PrevMatch = nullptr;
588 ++I;
589 }
590 }
591 }
592
593 /// Assign new LoopIDs for the partition's cloned loop.
594 void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
595 std::optional<MDNode *> PartitionID = makeFollowupLoopID(
596 OrigLoopID,
598 Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
600 if (PartitionID) {
601 Loop *NewLoop = Part->getDistributedLoop();
602 NewLoop->setLoopID(*PartitionID);
603 }
604 }
605};
606
607/// For each memory instruction, this class maintains difference of the
608/// number of unsafe dependences that start out from this instruction minus
609/// those that end here.
610///
611/// By traversing the memory instructions in program order and accumulating this
612/// number, we know whether any unsafe dependence crosses over a program point.
613class MemoryInstructionDependences {
614 using Dependence = MemoryDepChecker::Dependence;
615
616public:
617 struct Entry {
618 Instruction *Inst;
619 unsigned NumUnsafeDependencesStartOrEnd = 0;
620
621 Entry(Instruction *Inst) : Inst(Inst) {}
622 };
623
624 using AccessesType = SmallVector<Entry, 8>;
625
626 AccessesType::const_iterator begin() const { return Accesses.begin(); }
627 AccessesType::const_iterator end() const { return Accesses.end(); }
628
629 MemoryInstructionDependences(
630 const SmallVectorImpl<Instruction *> &Instructions,
631 const SmallVectorImpl<Dependence> &Dependences) {
632 Accesses.append(Instructions.begin(), Instructions.end());
633
634 LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
635 for (const auto &Dep : Dependences)
636 if (Dep.isPossiblyBackward()) {
637 // Note that the designations source and destination follow the program
638 // order, i.e. source is always first. (The direction is given by the
639 // DepType.)
640 ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
641 --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
642
643 LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
644 }
645 }
646
647private:
648 AccessesType Accesses;
649};
650
651/// The actual class performing the per-loop work.
652class LoopDistributeForLoop {
653public:
654 LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
655 ScalarEvolution *SE, LoopAccessInfoManager &LAIs,
656 OptimizationRemarkEmitter *ORE)
657 : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
658 setForced();
659 }
660
661 /// Try to distribute an inner-most loop.
662 bool processLoop() {
663 assert(L->isInnermost() && "Only process inner loops.");
664
665 LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
666 << L->getHeader()->getParent()->getName() << "' from "
667 << L->getLocStr() << "\n");
668
669 // Having a single exit block implies there's also one exiting block.
670 if (!L->getExitBlock())
671 return fail("MultipleExitBlocks", "multiple exit blocks");
672 if (!L->isLoopSimplifyForm())
673 return fail("NotLoopSimplifyForm",
674 "loop is not in loop-simplify form");
675 if (!L->isRotatedForm())
676 return fail("NotBottomTested", "loop is not bottom tested");
677
678 BasicBlock *PH = L->getLoopPreheader();
679
680 LAI = &LAIs.getInfo(*L);
681
682 // Currently, we only distribute to isolate the part of the loop with
683 // dependence cycles to enable partial vectorization.
684 if (LAI->canVectorizeMemory())
685 return fail("MemOpsCanBeVectorized",
686 "memory operations are safe for vectorization");
687
688 auto *Dependences = LAI->getDepChecker().getDependences();
689 if (!Dependences || Dependences->empty())
690 return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
691
692 LLVM_DEBUG(dbgs() << "LDist: Found a candidate loop: "
693 << L->getHeader()->getName() << "\n");
694
695 InstPartitionContainer Partitions(L, LI, DT);
696
697 // First, go through each memory operation and assign them to consecutive
698 // partitions (the order of partitions follows program order). Put those
699 // with unsafe dependences into "cyclic" partition otherwise put each store
700 // in its own "non-cyclic" partition (we'll merge these later).
701 //
702 // Note that a memory operation (e.g. Load2 below) at a program point that
703 // has an unsafe dependence (Store3->Load1) spanning over it must be
704 // included in the same cyclic partition as the dependent operations. This
705 // is to preserve the original program order after distribution. E.g.:
706 //
707 // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
708 // Load1 -. 1 0->1
709 // Load2 | /Unsafe/ 0 1
710 // Store3 -' -1 1->0
711 // Load4 0 0
712 //
713 // NumUnsafeDependencesActive > 0 indicates this situation and in this case
714 // we just keep assigning to the same cyclic partition until
715 // NumUnsafeDependencesActive reaches 0.
716 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
717 MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
718 *Dependences);
719
720 int NumUnsafeDependencesActive = 0;
721 for (const auto &InstDep : MID) {
722 Instruction *I = InstDep.Inst;
723 // We update NumUnsafeDependencesActive post-instruction, catch the
724 // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
725 if (NumUnsafeDependencesActive ||
726 InstDep.NumUnsafeDependencesStartOrEnd > 0)
727 Partitions.addToCyclicPartition(I);
728 else
729 Partitions.addToNewNonCyclicPartition(I);
730 NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
731 assert(NumUnsafeDependencesActive >= 0 &&
732 "Negative number of dependences active");
733 }
734
735 // Add partitions for values used outside. These partitions can be out of
736 // order from the original program order. This is OK because if the
737 // partition uses a load we will merge this partition with the original
738 // partition of the load that we set up in the previous loop (see
739 // mergeToAvoidDuplicatedLoads).
740 auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
741 for (auto *Inst : DefsUsedOutside)
742 Partitions.addToNewNonCyclicPartition(Inst);
743
744 LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
745 if (Partitions.getSize() < 2)
746 return fail("CantIsolateUnsafeDeps",
747 "cannot isolate unsafe dependencies");
748
749 // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
750 // should be able to vectorize these together.
751 Partitions.mergeBeforePopulating();
752 LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
753 if (Partitions.getSize() < 2)
754 return fail("CantIsolateUnsafeDeps",
755 "cannot isolate unsafe dependencies");
756
757 // Now, populate the partitions with non-memory operations.
758 Partitions.populateUsedSet();
759 LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
760
761 // In order to preserve original lexical order for loads, keep them in the
762 // partition that we set up in the MemoryInstructionDependences loop.
763 if (Partitions.mergeToAvoidDuplicatedLoads()) {
764 LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
765 << Partitions);
766 if (Partitions.getSize() < 2)
767 return fail("CantIsolateUnsafeDeps",
768 "cannot isolate unsafe dependencies");
769 }
770
771 // Don't distribute the loop if we need too many SCEV run-time checks, or
772 // any if it's illegal.
773 const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
774 if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
775 return fail("RuntimeCheckWithConvergent",
776 "may not insert runtime check with convergent operation");
777 }
778
779 if (Pred.getComplexity() > (IsForced.value_or(false)
782 return fail("TooManySCEVRuntimeChecks",
783 "too many SCEV run-time checks needed.\n");
784
785 if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
786 return fail("HeuristicDisabled", "distribution heuristic disabled");
787
788 LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
789 << L->getHeader()->getName() << "\n");
790 // We're done forming the partitions set up the reverse mapping from
791 // instructions to partitions.
792 Partitions.setupPartitionIdOnInstructions();
793
794 // If we need run-time checks, version the loop now.
795 auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
796 const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
797 const auto &AllChecks = RtPtrChecking->getChecks();
798 auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
799 RtPtrChecking);
800
801 if (LAI->hasConvergentOp() && !Checks.empty()) {
802 return fail("RuntimeCheckWithConvergent",
803 "may not insert runtime check with convergent operation");
804 }
805
806 // To keep things simple have an empty preheader before we version or clone
807 // the loop. (Also split if this has no predecessor, i.e. entry, because we
808 // rely on PH having a predecessor.)
809 if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
810 SplitBlock(PH, PH->getTerminator(), DT, LI);
811
812 if (!Pred.isAlwaysTrue() || !Checks.empty()) {
813 assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
814
815 MDNode *OrigLoopID = L->getLoopID();
816
817 LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
818 LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
819 LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
820 LVer.versionLoop(DefsUsedOutside);
821 LVer.annotateLoopWithNoAlias();
822
823 // The unversioned loop will not be changed, so we inherit all attributes
824 // from the original loop, but remove the loop distribution metadata to
825 // avoid to distribute it again.
826 MDNode *UnversionedLoopID = *makeFollowupLoopID(
827 OrigLoopID,
829 "llvm.loop.distribute.", true);
830 LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
831 addStringMetadataToLoop(LVer.getNonVersionedLoop(), DistributedMetaData,
832 true);
833 }
834
835 // Create identical copies of the original loop for each partition and hook
836 // them up sequentially.
837 Partitions.cloneLoops();
838
839 // Now, we remove the instruction from each loop that don't belong to that
840 // partition.
841 Partitions.removeUnusedInsts();
842 LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
843 LLVM_DEBUG(Partitions.printBlocks(dbgs()));
844
845 if (LDistVerify) {
846 LI->verify(*DT);
847 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
848 }
849
850 ++NumLoopsDistributed;
851 // Report the success.
852 ORE->emit([&]() {
853 return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
854 L->getHeader())
855 << "distributed loop";
856 });
857 return true;
858 }
859
860 /// Provide diagnostics then \return with false.
861 bool fail(StringRef RemarkName, StringRef Message) {
862 LLVMContext &Ctx = F->getContext();
863 bool Forced = isForced().value_or(false);
864
865 LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
866
867 // With Rpass-missed report that distribution failed.
868 ORE->emit([&]() {
869 return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
870 L->getStartLoc(), L->getHeader())
871 << "loop not distributed: use -Rpass-analysis=loop-distribute for "
872 "more "
873 "info";
874 });
875
876 // With Rpass-analysis report why. This is on by default if distribution
877 // was requested explicitly.
878 ORE->emit(OptimizationRemarkAnalysis(
880 RemarkName, L->getStartLoc(), L->getHeader())
881 << "loop not distributed: " << Message);
882
883 // Also issue a warning if distribution was requested explicitly but it
884 // failed.
885 if (Forced)
886 Ctx.diagnose(DiagnosticInfoOptimizationFailure(
887 *F, L->getStartLoc(), "loop not distributed: failed "
888 "explicitly specified loop distribution"));
889
890 return false;
891 }
892
893 /// Return if distribution forced to be enabled/disabled for the loop.
894 ///
895 /// If the optional has a value, it indicates whether distribution was forced
896 /// to be enabled (true) or disabled (false). If the optional has no value
897 /// distribution was not forced either way.
898 const std::optional<bool> &isForced() const { return IsForced; }
899
900private:
901 /// Filter out checks between pointers from the same partition.
902 ///
903 /// \p PtrToPartition contains the partition number for pointers. Partition
904 /// number -1 means that the pointer is used in multiple partitions. In this
905 /// case we can't safely omit the check.
906 SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
907 const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
908 const SmallVectorImpl<int> &PtrToPartition,
909 const RuntimePointerChecking *RtPtrChecking) {
910 SmallVector<RuntimePointerCheck, 4> Checks;
911
912 copy_if(AllChecks, std::back_inserter(Checks),
913 [&](const RuntimePointerCheck &Check) {
914 for (unsigned PtrIdx1 : Check.first->Members)
915 for (unsigned PtrIdx2 : Check.second->Members)
916 // Only include this check if there is a pair of pointers
917 // that require checking and the pointers fall into
918 // separate partitions.
919 //
920 // (Note that we already know at this point that the two
921 // pointer groups need checking but it doesn't follow
922 // that each pair of pointers within the two groups need
923 // checking as well.
924 //
925 // In other words we don't want to include a check just
926 // because there is a pair of pointers between the two
927 // pointer groups that require checks and a different
928 // pair whose pointers fall into different partitions.)
929 if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
931 PtrToPartition, PtrIdx1, PtrIdx2))
932 return true;
933 return false;
934 });
935
936 return Checks;
937 }
938
939 /// Check whether the loop metadata is forcing distribution to be
940 /// enabled/disabled.
941 void setForced() {
942 std::optional<const MDOperand *> Value =
943 findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
944 if (!Value)
945 return;
946
947 const MDOperand *Op = *Value;
948 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
949 IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
950 }
951
952 Loop *L;
953 Function *F;
954
955 // Analyses used.
956 LoopInfo *LI;
957 const LoopAccessInfo *LAI = nullptr;
958 DominatorTree *DT;
959 ScalarEvolution *SE;
960 LoopAccessInfoManager &LAIs;
961 OptimizationRemarkEmitter *ORE;
962
963 /// Indicates whether distribution is forced to be enabled/disabled for
964 /// the loop.
965 ///
966 /// If the optional has a value, it indicates whether distribution was forced
967 /// to be enabled (true) or disabled (false). If the optional has no value
968 /// distribution was not forced either way.
969 std::optional<bool> IsForced;
970};
971
972} // end anonymous namespace
973
974static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
976 LoopAccessInfoManager &LAIs) {
977 // Build up a worklist of inner-loops to distribute. This is necessary as the
978 // act of distributing a loop creates new loops and can invalidate iterators
979 // across the loops.
980 SmallVector<Loop *, 8> Worklist;
981
982 for (Loop *TopLevelLoop : *LI)
983 for (Loop *L : depth_first(TopLevelLoop))
984 // We only handle inner-most loops.
985 if (L->isInnermost())
986 Worklist.push_back(L);
987
988 // Now walk the identified inner loops.
989 bool Changed = false;
990 for (Loop *L : Worklist) {
991 LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
992
993 // Do not reprocess loops we already distributed
994 if (getOptionalBoolLoopAttribute(L, DistributedMetaData).value_or(false)) {
996 dbgs() << "LDist: Distributed loop guarded for reprocessing\n");
997 continue;
998 }
999
1000 // If distribution was forced for the specific loop to be
1001 // enabled/disabled, follow that. Otherwise use the global flag.
1002 if (LDL.isForced().value_or(EnableLoopDistribute))
1003 Changed |= LDL.processLoop();
1004 }
1005
1006 // Process each loop nest in the function.
1007 return Changed;
1008}
1009
1012 auto &LI = AM.getResult<LoopAnalysis>(F);
1013 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1014 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1016
1018 bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
1019 if (!Changed)
1020 return PreservedAnalyses::all();
1022 PA.preserve<LoopAnalysis>();
1024 return PA;
1025}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:992
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static const char *const LLVMLoopDistributeFollowupCoincident
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
#define LDIST_NAME
static const char *const LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupAll
static const char * DistributedMetaData
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma clang loop " "distribute(enable)"))
static const char *const LLVMLoopDistributeFollowupFallback
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, LoopAccessInfoManager &LAIs)
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static unsigned getSize(unsigned Kind)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This analysis provides dependence information for the memory accesses of a loop.
const RuntimePointerChecking * getRuntimePointerChecking() const
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
The optimization diagnostic interface.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
typename vector_type::const_iterator iterator
Definition SetVector.h:71
typename vector_type::const_iterator const_iterator
Definition SetVector.h:72
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
Definition Metadata.h:649
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:310
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
LLVM_ABI std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
LLVM_ABI Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
#define N