LLVM 22.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Module.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
51#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <optional>
67#include <utility>
68
69#define DEBUG_TYPE "simple-loop-unswitch"
70
71using namespace llvm;
72using namespace llvm::PatternMatch;
73
74STATISTIC(NumBranches, "Number of branches unswitched");
75STATISTIC(NumSwitches, "Number of switches unswitched");
76STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial, "Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83 "Number of invariant conditions injected and unswitched");
84
86 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
87 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
89
90static cl::opt<int>
91 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
92 cl::desc("The cost threshold for unswitching a loop."));
93
95 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
96 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
99 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
100 cl::desc("Toplevel siblings divisor for cost multiplier."));
102 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
103 cl::desc("Outer loop size divisor for cost multiplier."));
105 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
106 cl::desc("Number of unswitch candidates that are ignored when calculating "
107 "cost multiplier."));
109 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
110 cl::desc("If enabled, simple loop unswitching will also consider "
111 "llvm.experimental.guard intrinsics as unswitch candidates."));
113 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
114 cl::init(false), cl::Hidden,
115 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
116 "null checks to save time analyzing if we can keep it."));
118 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
119 cl::desc("Max number of memory uses to explore during "
120 "partial unswitching analysis"),
121 cl::init(100), cl::Hidden);
123 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
124 cl::desc("If enabled, the freeze instruction will be added to condition "
125 "of loop unswitch to prevent miscompilation."));
126
128 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
129 cl::desc("Whether we should inject new invariants and unswitch them to "
130 "eliminate some existing (non-invariant) conditions."),
131 cl::init(true));
132
134 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
135 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
136 "unswitch on them to eliminate branches that are "
137 "not-taken 1/<this option> times or less."),
138 cl::init(16));
139
141namespace {
142struct CompareDesc {
143 BranchInst *Term;
144 Value *Invariant;
145 BasicBlock *InLoopSucc;
146
147 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
148 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
149};
150
151struct InjectedInvariant {
152 ICmpInst::Predicate Pred;
153 Value *LHS;
154 Value *RHS;
155 BasicBlock *InLoopSucc;
156
157 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
158 BasicBlock *InLoopSucc)
159 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
160};
161
162struct NonTrivialUnswitchCandidate {
163 Instruction *TI = nullptr;
164 TinyPtrVector<Value *> Invariants;
165 std::optional<InstructionCost> Cost;
166 std::optional<InjectedInvariant> PendingInjection;
167 NonTrivialUnswitchCandidate(
168 Instruction *TI, ArrayRef<Value *> Invariants,
169 std::optional<InstructionCost> Cost = std::nullopt,
170 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
171 : TI(TI), Invariants(Invariants), Cost(Cost),
172 PendingInjection(PendingInjection) {};
173
174 bool hasPendingInjection() const { return PendingInjection.has_value(); }
175};
176} // end anonymous namespace.
177
178// Helper to skip (select x, true, false), which matches both a logical AND and
179// OR and can confuse code that tries to determine if \p Cond is either a
180// logical AND or OR but not both.
182 Value *CondNext;
183 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
184 Cond = CondNext;
185 return Cond;
186}
187
188/// Collect all of the loop invariant input values transitively used by the
189/// homogeneous instruction graph from a given root.
190///
191/// This essentially walks from a root recursively through loop variant operands
192/// which have perform the same logical operation (AND or OR) and finds all
193/// inputs which are loop invariant. For some operations these can be
194/// re-associated and unswitched out of the loop entirely.
197 const LoopInfo &LI) {
198 assert(!L.isLoopInvariant(&Root) &&
199 "Only need to walk the graph if root itself is not invariant.");
200 TinyPtrVector<Value *> Invariants;
201
202 bool IsRootAnd = match(&Root, m_LogicalAnd());
203 bool IsRootOr = match(&Root, m_LogicalOr());
204
205 // Build a worklist and recurse through operators collecting invariants.
208 Worklist.push_back(&Root);
209 Visited.insert(&Root);
210 do {
211 Instruction &I = *Worklist.pop_back_val();
212 for (Value *OpV : I.operand_values()) {
213 // Skip constants as unswitching isn't interesting for them.
214 if (isa<Constant>(OpV))
215 continue;
216
217 // Add it to our result if loop invariant.
218 if (L.isLoopInvariant(OpV)) {
219 Invariants.push_back(OpV);
220 continue;
221 }
222
223 // If not an instruction with the same opcode, nothing we can do.
225
226 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
227 (IsRootOr && match(OpI, m_LogicalOr())))) {
228 // Visit this operand.
229 if (Visited.insert(OpI).second)
230 Worklist.push_back(OpI);
231 }
232 }
233 } while (!Worklist.empty());
234
235 return Invariants;
236}
237
238static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
239 Constant &Replacement) {
240 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
241
242 // Replace uses of LIC in the loop with the given constant.
243 // We use make_early_inc_range as set invalidates the iterator.
244 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
245 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
246
247 // Replace this use within the loop body.
248 if (UserI && L.contains(UserI))
249 U.set(&Replacement);
250 }
251}
252
253/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
254/// incoming values along this edge.
256 const BasicBlock &ExitingBB,
257 const BasicBlock &ExitBB) {
258 for (const Instruction &I : ExitBB) {
259 auto *PN = dyn_cast<PHINode>(&I);
260 if (!PN)
261 // No more PHIs to check.
262 return true;
263
264 // If the incoming value for this edge isn't loop invariant the unswitch
265 // won't be trivial.
266 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
267 return false;
268 }
269 llvm_unreachable("Basic blocks should never be empty!");
270}
271
272/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
273/// end of \p BB and conditionally branch on the copied condition. We only
274/// branch on a single value.
276 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
277 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
278 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
279 IRBuilder<> IRB(&BB);
281
282 SmallVector<Value *> FrozenInvariants;
283 for (Value *Inv : Invariants) {
284 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
285 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
286 FrozenInvariants.push_back(Inv);
287 }
288
289 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
290 : IRB.CreateAnd(FrozenInvariants);
291 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
292 Direction ? &NormalSucc : &UnswitchedSucc);
293}
294
295/// Copy a set of loop invariant values, and conditionally branch on them.
297 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
298 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
299 MemorySSAUpdater *MSSAU) {
301 for (auto *Val : reverse(ToDuplicate)) {
302 Instruction *Inst = cast<Instruction>(Val);
303 Instruction *NewInst = Inst->clone();
304
305 if (const DebugLoc &DL = Inst->getDebugLoc())
306 mapAtomInstance(DL, VMap);
307
308 NewInst->insertInto(&BB, BB.end());
309 RemapInstruction(NewInst, VMap,
311 VMap[Val] = NewInst;
312
313 if (!MSSAU)
314 continue;
315
316 MemorySSA *MSSA = MSSAU->getMemorySSA();
317 if (auto *MemUse =
319 auto *DefiningAccess = MemUse->getDefiningAccess();
320 // Get the first defining access before the loop.
321 while (L.contains(DefiningAccess->getBlock())) {
322 // If the defining access is a MemoryPhi, get the incoming
323 // value for the pre-header as defining access.
324 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
325 DefiningAccess =
326 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
327 else
328 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
329 }
330 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
331 NewInst->getParent(),
333 }
334 }
335
336 IRBuilder<> IRB(&BB);
338 Value *Cond = VMap[ToDuplicate[0]];
339 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
340 Direction ? &NormalSucc : &UnswitchedSucc);
341}
342
343/// Rewrite the PHI nodes in an unswitched loop exit basic block.
344///
345/// Requires that the loop exit and unswitched basic block are the same, and
346/// that the exiting block was a unique predecessor of that block. Rewrites the
347/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
348/// PHI nodes from the old preheader that now contains the unswitched
349/// terminator.
351 BasicBlock &OldExitingBB,
352 BasicBlock &OldPH) {
353 for (PHINode &PN : UnswitchedBB.phis()) {
354 // When the loop exit is directly unswitched we just need to update the
355 // incoming basic block. We loop to handle weird cases with repeated
356 // incoming blocks, but expect to typically only have one operand here.
357 for (auto i : seq<int>(0, PN.getNumOperands())) {
358 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
359 "Found incoming block different from unique predecessor!");
360 PN.setIncomingBlock(i, &OldPH);
361 }
362 }
363}
364
365/// Rewrite the PHI nodes in the loop exit basic block and the split off
366/// unswitched block.
367///
368/// Because the exit block remains an exit from the loop, this rewrites the
369/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
370/// nodes into the unswitched basic block to select between the value in the
371/// old preheader and the loop exit.
373 BasicBlock &UnswitchedBB,
374 BasicBlock &OldExitingBB,
375 BasicBlock &OldPH,
376 bool FullUnswitch) {
377 assert(&ExitBB != &UnswitchedBB &&
378 "Must have different loop exit and unswitched blocks!");
379 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
380 for (PHINode &PN : ExitBB.phis()) {
381 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
382 PN.getName() + ".split");
383 NewPN->insertBefore(InsertPt);
384
385 // Walk backwards over the old PHI node's inputs to minimize the cost of
386 // removing each one. We have to do this weird loop manually so that we
387 // create the same number of new incoming edges in the new PHI as we expect
388 // each case-based edge to be included in the unswitched switch in some
389 // cases.
390 // FIXME: This is really, really gross. It would be much cleaner if LLVM
391 // allowed us to create a single entry for a predecessor block without
392 // having separate entries for each "edge" even though these edges are
393 // required to produce identical results.
394 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
395 if (PN.getIncomingBlock(i) != &OldExitingBB)
396 continue;
397
398 Value *Incoming = PN.getIncomingValue(i);
399 if (FullUnswitch)
400 // No more edge from the old exiting block to the exit block.
401 PN.removeIncomingValue(i);
402
403 NewPN->addIncoming(Incoming, &OldPH);
404 }
405
406 // Now replace the old PHI with the new one and wire the old one in as an
407 // input to the new one.
408 PN.replaceAllUsesWith(NewPN);
409 NewPN->addIncoming(&PN, &ExitBB);
410 }
411}
412
413/// Hoist the current loop up to the innermost loop containing a remaining exit.
414///
415/// Because we've removed an exit from the loop, we may have changed the set of
416/// loops reachable and need to move the current loop up the loop nest or even
417/// to an entirely separate nest.
418static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
419 DominatorTree &DT, LoopInfo &LI,
420 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
421 // If the loop is already at the top level, we can't hoist it anywhere.
422 Loop *OldParentL = L.getParentLoop();
423 if (!OldParentL)
424 return;
425
427 L.getExitBlocks(Exits);
428 Loop *NewParentL = nullptr;
429 for (auto *ExitBB : Exits)
430 if (Loop *ExitL = LI.getLoopFor(ExitBB))
431 if (!NewParentL || NewParentL->contains(ExitL))
432 NewParentL = ExitL;
433
434 if (NewParentL == OldParentL)
435 return;
436
437 // The new parent loop (if different) should always contain the old one.
438 if (NewParentL)
439 assert(NewParentL->contains(OldParentL) &&
440 "Can only hoist this loop up the nest!");
441
442 // The preheader will need to move with the body of this loop. However,
443 // because it isn't in this loop we also need to update the primary loop map.
444 assert(OldParentL == LI.getLoopFor(&Preheader) &&
445 "Parent loop of this loop should contain this loop's preheader!");
446 LI.changeLoopFor(&Preheader, NewParentL);
447
448 // Remove this loop from its old parent.
449 OldParentL->removeChildLoop(&L);
450
451 // Add the loop either to the new parent or as a top-level loop.
452 if (NewParentL)
453 NewParentL->addChildLoop(&L);
454 else
455 LI.addTopLevelLoop(&L);
456
457 // Remove this loops blocks from the old parent and every other loop up the
458 // nest until reaching the new parent. Also update all of these
459 // no-longer-containing loops to reflect the nesting change.
460 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
461 OldContainingL = OldContainingL->getParentLoop()) {
462 llvm::erase_if(OldContainingL->getBlocksVector(),
463 [&](const BasicBlock *BB) {
464 return BB == &Preheader || L.contains(BB);
465 });
466
467 OldContainingL->getBlocksSet().erase(&Preheader);
468 for (BasicBlock *BB : L.blocks())
469 OldContainingL->getBlocksSet().erase(BB);
470
471 // Because we just hoisted a loop out of this one, we have essentially
472 // created new exit paths from it. That means we need to form LCSSA PHI
473 // nodes for values used in the no-longer-nested loop.
474 formLCSSA(*OldContainingL, DT, &LI, SE);
475
476 // We shouldn't need to form dedicated exits because the exit introduced
477 // here is the (just split by unswitching) preheader. However, after trivial
478 // unswitching it is possible to get new non-dedicated exits out of parent
479 // loop so let's conservatively form dedicated exit blocks and figure out
480 // if we can optimize later.
481 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
482 /*PreserveLCSSA*/ true);
483 }
484}
485
486// Return the top-most loop containing ExitBB and having ExitBB as exiting block
487// or the loop containing ExitBB, if there is no parent loop containing ExitBB
488// as exiting block.
490 const LoopInfo &LI) {
491 Loop *TopMost = LI.getLoopFor(ExitBB);
492 Loop *Current = TopMost;
493 while (Current) {
494 if (Current->isLoopExiting(ExitBB))
495 TopMost = Current;
496 Current = Current->getParentLoop();
497 }
498 return TopMost;
499}
500
501/// Unswitch a trivial branch if the condition is loop invariant.
502///
503/// This routine should only be called when loop code leading to the branch has
504/// been validated as trivial (no side effects). This routine checks if the
505/// condition is invariant and one of the successors is a loop exit. This
506/// allows us to unswitch without duplicating the loop, making it trivial.
507///
508/// If this routine fails to unswitch the branch it returns false.
509///
510/// If the branch can be unswitched, this routine splits the preheader and
511/// hoists the branch above that split. Preserves loop simplified form
512/// (splitting the exit block as necessary). It simplifies the branch within
513/// the loop to an unconditional branch but doesn't remove it entirely. Further
514/// cleanup can be done with some simplifycfg like pass.
515///
516/// If `SE` is not null, it will be updated based on the potential loop SCEVs
517/// invalidated by this.
519 LoopInfo &LI, ScalarEvolution *SE,
520 MemorySSAUpdater *MSSAU) {
521 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
522 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
523
524 // The loop invariant values that we want to unswitch.
525 TinyPtrVector<Value *> Invariants;
526
527 // When true, we're fully unswitching the branch rather than just unswitching
528 // some input conditions to the branch.
529 bool FullUnswitch = false;
530
532 if (L.isLoopInvariant(Cond)) {
533 Invariants.push_back(Cond);
534 FullUnswitch = true;
535 } else {
536 if (auto *CondInst = dyn_cast<Instruction>(Cond))
537 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
538 if (Invariants.empty()) {
539 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
540 return false;
541 }
542 }
543
544 // Check that one of the branch's successors exits, and which one.
545 bool ExitDirection = true;
546 int LoopExitSuccIdx = 0;
547 auto *LoopExitBB = BI.getSuccessor(0);
548 if (L.contains(LoopExitBB)) {
549 ExitDirection = false;
550 LoopExitSuccIdx = 1;
551 LoopExitBB = BI.getSuccessor(1);
552 if (L.contains(LoopExitBB)) {
553 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
554 return false;
555 }
556 }
557 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
558 auto *ParentBB = BI.getParent();
559 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
560 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
561 return false;
562 }
563
564 // When unswitching only part of the branch's condition, we need the exit
565 // block to be reached directly from the partially unswitched input. This can
566 // be done when the exit block is along the true edge and the branch condition
567 // is a graph of `or` operations, or the exit block is along the false edge
568 // and the condition is a graph of `and` operations.
569 if (!FullUnswitch) {
570 if (ExitDirection ? !match(Cond, m_LogicalOr())
571 : !match(Cond, m_LogicalAnd())) {
572 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
573 "non-full unswitch!\n");
574 return false;
575 }
576 }
577
578 LLVM_DEBUG({
579 dbgs() << " unswitching trivial invariant conditions for: " << BI
580 << "\n";
581 for (Value *Invariant : Invariants) {
582 dbgs() << " " << *Invariant << " == true";
583 if (Invariant != Invariants.back())
584 dbgs() << " ||";
585 dbgs() << "\n";
586 }
587 });
588
589 // If we have scalar evolutions, we need to invalidate them including this
590 // loop, the loop containing the exit block and the topmost parent loop
591 // exiting via LoopExitBB.
592 if (SE) {
593 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
594 SE->forgetLoop(ExitL);
595 else
596 // Forget the entire nest as this exits the entire nest.
597 SE->forgetTopmostLoop(&L);
599 }
600
601 if (MSSAU && VerifyMemorySSA)
602 MSSAU->getMemorySSA()->verifyMemorySSA();
603
604 // Split the preheader, so that we know that there is a safe place to insert
605 // the conditional branch. We will change the preheader to have a conditional
606 // branch on LoopCond.
607 BasicBlock *OldPH = L.getLoopPreheader();
608 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
609
610 // Now that we have a place to insert the conditional branch, create a place
611 // to branch to: this is the exit block out of the loop that we are
612 // unswitching. We need to split this if there are other loop predecessors.
613 // Because the loop is in simplified form, *any* other predecessor is enough.
614 BasicBlock *UnswitchedBB;
615 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
616 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
617 "A branch's parent isn't a predecessor!");
618 UnswitchedBB = LoopExitBB;
619 } else {
620 UnswitchedBB =
621 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
622 }
623
624 if (MSSAU && VerifyMemorySSA)
625 MSSAU->getMemorySSA()->verifyMemorySSA();
626
627 // Actually move the invariant uses into the unswitched position. If possible,
628 // we do this by moving the instructions, but when doing partial unswitching
629 // we do it by building a new merge of the values in the unswitched position.
630 OldPH->getTerminator()->eraseFromParent();
631 if (FullUnswitch) {
632 // If fully unswitching, we can use the existing branch instruction.
633 // Splice it into the old PH to gate reaching the new preheader and re-point
634 // its successors.
635 BI.moveBefore(*OldPH, OldPH->end());
636 BI.setCondition(Cond);
637 if (MSSAU) {
638 // Temporarily clone the terminator, to make MSSA update cheaper by
639 // separating "insert edge" updates from "remove edge" ones.
640 BI.clone()->insertInto(ParentBB, ParentBB->end());
641 } else {
642 // Create a new unconditional branch that will continue the loop as a new
643 // terminator.
644 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
645 NewBI->setDebugLoc(BI.getDebugLoc());
646 }
647 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
648 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
649 } else {
650 // Only unswitching a subset of inputs to the condition, so we will need to
651 // build a new branch that merges the invariant inputs.
652 if (ExitDirection)
654 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
655 "condition!");
656 else
658 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
659 " condition!");
661 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
662 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
663 }
664
665 // Update the dominator tree with the added edge.
666 DT.insertEdge(OldPH, UnswitchedBB);
667
668 // After the dominator tree was updated with the added edge, update MemorySSA
669 // if available.
670 if (MSSAU) {
672 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
673 MSSAU->applyInsertUpdates(Updates, DT);
674 }
675
676 // Finish updating dominator tree and memory ssa for full unswitch.
677 if (FullUnswitch) {
678 if (MSSAU) {
679 Instruction *Term = ParentBB->getTerminator();
680 // Remove the cloned branch instruction and create unconditional branch
681 // now.
682 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
683 NewBI->setDebugLoc(Term->getDebugLoc());
684 Term->eraseFromParent();
685 MSSAU->removeEdge(ParentBB, LoopExitBB);
686 }
687 DT.deleteEdge(ParentBB, LoopExitBB);
688 }
689
690 if (MSSAU && VerifyMemorySSA)
691 MSSAU->getMemorySSA()->verifyMemorySSA();
692
693 // Rewrite the relevant PHI nodes.
694 if (UnswitchedBB == LoopExitBB)
695 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
696 else
697 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
698 *ParentBB, *OldPH, FullUnswitch);
699
700 // The constant we can replace all of our invariants with inside the loop
701 // body. If any of the invariants have a value other than this the loop won't
702 // be entered.
703 ConstantInt *Replacement = ExitDirection
706
707 // Since this is an i1 condition we can also trivially replace uses of it
708 // within the loop with a constant.
709 for (Value *Invariant : Invariants)
710 replaceLoopInvariantUses(L, Invariant, *Replacement);
711
712 // If this was full unswitching, we may have changed the nesting relationship
713 // for this loop so hoist it to its correct parent if needed.
714 if (FullUnswitch)
715 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
716
717 if (MSSAU && VerifyMemorySSA)
718 MSSAU->getMemorySSA()->verifyMemorySSA();
719
720 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
721 ++NumTrivial;
722 ++NumBranches;
723 return true;
724}
725
726/// Unswitch a trivial switch if the condition is loop invariant.
727///
728/// This routine should only be called when loop code leading to the switch has
729/// been validated as trivial (no side effects). This routine checks if the
730/// condition is invariant and that at least one of the successors is a loop
731/// exit. This allows us to unswitch without duplicating the loop, making it
732/// trivial.
733///
734/// If this routine fails to unswitch the switch it returns false.
735///
736/// If the switch can be unswitched, this routine splits the preheader and
737/// copies the switch above that split. If the default case is one of the
738/// exiting cases, it copies the non-exiting cases and points them at the new
739/// preheader. If the default case is not exiting, it copies the exiting cases
740/// and points the default at the preheader. It preserves loop simplified form
741/// (splitting the exit blocks as necessary). It simplifies the switch within
742/// the loop by removing now-dead cases. If the default case is one of those
743/// unswitched, it replaces its destination with a new basic block containing
744/// only unreachable. Such basic blocks, while technically loop exits, are not
745/// considered for unswitching so this is a stable transform and the same
746/// switch will not be revisited. If after unswitching there is only a single
747/// in-loop successor, the switch is further simplified to an unconditional
748/// branch. Still more cleanup can be done with some simplifycfg like pass.
749///
750/// If `SE` is not null, it will be updated based on the potential loop SCEVs
751/// invalidated by this.
753 LoopInfo &LI, ScalarEvolution *SE,
754 MemorySSAUpdater *MSSAU) {
755 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
756 Value *LoopCond = SI.getCondition();
757
758 // If this isn't switching on an invariant condition, we can't unswitch it.
759 if (!L.isLoopInvariant(LoopCond))
760 return false;
761
762 auto *ParentBB = SI.getParent();
763
764 // The same check must be used both for the default and the exit cases. We
765 // should never leave edges from the switch instruction to a basic block that
766 // we are unswitching, hence the condition used to determine the default case
767 // needs to also be used to populate ExitCaseIndices, which is then used to
768 // remove cases from the switch.
769 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
770 // BBToCheck is not an exit block if it is inside loop L.
771 if (L.contains(&BBToCheck))
772 return false;
773 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
774 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
775 return false;
776 // We do not unswitch a block that only has an unreachable statement, as
777 // it's possible this is a previously unswitched block. Only unswitch if
778 // either the terminator is not unreachable, or, if it is, it's not the only
779 // instruction in the block.
780 auto *TI = BBToCheck.getTerminator();
781 bool isUnreachable = isa<UnreachableInst>(TI);
782 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
783 };
784
785 SmallVector<int, 4> ExitCaseIndices;
786 for (auto Case : SI.cases())
787 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
788 ExitCaseIndices.push_back(Case.getCaseIndex());
789 BasicBlock *DefaultExitBB = nullptr;
792 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
793 DefaultExitBB = SI.getDefaultDest();
794 } else if (ExitCaseIndices.empty())
795 return false;
796
797 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
798
799 if (MSSAU && VerifyMemorySSA)
800 MSSAU->getMemorySSA()->verifyMemorySSA();
801
802 // We may need to invalidate SCEVs for the outermost loop reached by any of
803 // the exits.
804 Loop *OuterL = &L;
805
806 if (DefaultExitBB) {
807 // Check the loop containing this exit.
808 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
809 if (!ExitL || ExitL->contains(OuterL))
810 OuterL = ExitL;
811 }
812 for (unsigned Index : ExitCaseIndices) {
813 auto CaseI = SI.case_begin() + Index;
814 // Compute the outer loop from this exit.
815 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
816 if (!ExitL || ExitL->contains(OuterL))
817 OuterL = ExitL;
818 }
819
820 if (SE) {
821 if (OuterL)
822 SE->forgetLoop(OuterL);
823 else
824 SE->forgetTopmostLoop(&L);
825 }
826
827 if (DefaultExitBB) {
828 // Clear out the default destination temporarily to allow accurate
829 // predecessor lists to be examined below.
830 SI.setDefaultDest(nullptr);
831 }
832
833 // Store the exit cases into a separate data structure and remove them from
834 // the switch.
835 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
837 4> ExitCases;
838 ExitCases.reserve(ExitCaseIndices.size());
840 // We walk the case indices backwards so that we remove the last case first
841 // and don't disrupt the earlier indices.
842 for (unsigned Index : reverse(ExitCaseIndices)) {
843 auto CaseI = SI.case_begin() + Index;
844 // Save the value of this case.
845 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
846 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
847 // Delete the unswitched cases.
848 SIW.removeCase(CaseI);
849 }
850
851 // Check if after this all of the remaining cases point at the same
852 // successor.
853 BasicBlock *CommonSuccBB = nullptr;
854 if (SI.getNumCases() > 0 &&
855 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
856 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
857 }))
858 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
859 if (!DefaultExitBB) {
860 // If we're not unswitching the default, we need it to match any cases to
861 // have a common successor or if we have no cases it is the common
862 // successor.
863 if (SI.getNumCases() == 0)
864 CommonSuccBB = SI.getDefaultDest();
865 else if (SI.getDefaultDest() != CommonSuccBB)
866 CommonSuccBB = nullptr;
867 }
868
869 // Split the preheader, so that we know that there is a safe place to insert
870 // the switch.
871 BasicBlock *OldPH = L.getLoopPreheader();
872 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
873 OldPH->getTerminator()->eraseFromParent();
874
875 // Now add the unswitched switch. This new switch instruction inherits the
876 // debug location of the old switch, because it semantically replace the old
877 // one.
878 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
879 NewSI->setDebugLoc(SIW->getDebugLoc());
880 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
881
882 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
883 // First, we split any exit blocks with remaining in-loop predecessors. Then
884 // we update the PHIs in one of two ways depending on if there was a split.
885 // We walk in reverse so that we split in the same order as the cases
886 // appeared. This is purely for convenience of reading the resulting IR, but
887 // it doesn't cost anything really.
888 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
890 // Handle the default exit if necessary.
891 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
892 // ranges aren't quite powerful enough yet.
893 if (DefaultExitBB) {
894 if (pred_empty(DefaultExitBB)) {
895 UnswitchedExitBBs.insert(DefaultExitBB);
896 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
897 } else {
898 auto *SplitBB =
899 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
900 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
901 *ParentBB, *OldPH,
902 /*FullUnswitch*/ true);
903 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
904 }
905 }
906 // Note that we must use a reference in the for loop so that we update the
907 // container.
908 for (auto &ExitCase : reverse(ExitCases)) {
909 // Grab a reference to the exit block in the pair so that we can update it.
910 BasicBlock *ExitBB = std::get<1>(ExitCase);
911
912 // If this case is the last edge into the exit block, we can simply reuse it
913 // as it will no longer be a loop exit. No mapping necessary.
914 if (pred_empty(ExitBB)) {
915 // Only rewrite once.
916 if (UnswitchedExitBBs.insert(ExitBB).second)
917 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
918 continue;
919 }
920
921 // Otherwise we need to split the exit block so that we retain an exit
922 // block from the loop and a target for the unswitched condition.
923 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
924 if (!SplitExitBB) {
925 // If this is the first time we see this, do the split and remember it.
926 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
927 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
928 *ParentBB, *OldPH,
929 /*FullUnswitch*/ true);
930 }
931 // Update the case pair to point to the split block.
932 std::get<1>(ExitCase) = SplitExitBB;
933 }
934
935 // Now add the unswitched cases. We do this in reverse order as we built them
936 // in reverse order.
937 for (auto &ExitCase : reverse(ExitCases)) {
938 ConstantInt *CaseVal = std::get<0>(ExitCase);
939 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
940
941 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
942 }
943
944 // If the default was unswitched, re-point it and add explicit cases for
945 // entering the loop.
946 if (DefaultExitBB) {
947 NewSIW->setDefaultDest(DefaultExitBB);
948 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
949
950 // We removed all the exit cases, so we just copy the cases to the
951 // unswitched switch.
952 for (const auto &Case : SI.cases())
953 NewSIW.addCase(Case.getCaseValue(), NewPH,
955 } else if (DefaultCaseWeight) {
956 // We have to set branch weight of the default case.
957 uint64_t SW = *DefaultCaseWeight;
958 for (const auto &Case : SI.cases()) {
959 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
960 assert(W &&
961 "case weight must be defined as default case weight is defined");
962 SW += *W;
963 }
964 NewSIW.setSuccessorWeight(0, SW);
965 }
966
967 // If we ended up with a common successor for every path through the switch
968 // after unswitching, rewrite it to an unconditional branch to make it easy
969 // to recognize. Otherwise we potentially have to recognize the default case
970 // pointing at unreachable and other complexity.
971 if (CommonSuccBB) {
972 BasicBlock *BB = SI.getParent();
973 // We may have had multiple edges to this common successor block, so remove
974 // them as predecessors. We skip the first one, either the default or the
975 // actual first case.
976 bool SkippedFirst = DefaultExitBB == nullptr;
977 for (auto Case : SI.cases()) {
978 assert(Case.getCaseSuccessor() == CommonSuccBB &&
979 "Non-common successor!");
980 (void)Case;
981 if (!SkippedFirst) {
982 SkippedFirst = true;
983 continue;
984 }
985 CommonSuccBB->removePredecessor(BB,
986 /*KeepOneInputPHIs*/ true);
987 }
988 // Now nuke the switch and replace it with a direct branch.
989 Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
990 NewBI->setDebugLoc(SIW->getDebugLoc());
991 SIW.eraseFromParent();
992 } else if (DefaultExitBB) {
993 assert(SI.getNumCases() > 0 &&
994 "If we had no cases we'd have a common successor!");
995 // Move the last case to the default successor. This is valid as if the
996 // default got unswitched it cannot be reached. This has the advantage of
997 // being simple and keeping the number of edges from this switch to
998 // successors the same, and avoiding any PHI update complexity.
999 auto LastCaseI = std::prev(SI.case_end());
1000
1001 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1003 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1004 SIW.removeCase(LastCaseI);
1005 }
1006
1007 // Walk the unswitched exit blocks and the unswitched split blocks and update
1008 // the dominator tree based on the CFG edits. While we are walking unordered
1009 // containers here, the API for applyUpdates takes an unordered list of
1010 // updates and requires them to not contain duplicates.
1012 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1013 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1014 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1015 }
1016 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1017 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1018 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1019 }
1020
1021 if (MSSAU) {
1022 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1023 if (VerifyMemorySSA)
1024 MSSAU->getMemorySSA()->verifyMemorySSA();
1025 } else {
1026 DT.applyUpdates(DTUpdates);
1027 }
1028
1029 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1030
1031 // We may have changed the nesting relationship for this loop so hoist it to
1032 // its correct parent if needed.
1033 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1034
1035 if (MSSAU && VerifyMemorySSA)
1036 MSSAU->getMemorySSA()->verifyMemorySSA();
1037
1038 ++NumTrivial;
1039 ++NumSwitches;
1040 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1041 return true;
1042}
1043
1044/// This routine scans the loop to find a branch or switch which occurs before
1045/// any side effects occur. These can potentially be unswitched without
1046/// duplicating the loop. If a branch or switch is successfully unswitched the
1047/// scanning continues to see if subsequent branches or switches have become
1048/// trivial. Once all trivial candidates have been unswitched, this routine
1049/// returns.
1050///
1051/// The return value indicates whether anything was unswitched (and therefore
1052/// changed).
1053///
1054/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1055/// invalidated by this.
1057 LoopInfo &LI, ScalarEvolution *SE,
1058 MemorySSAUpdater *MSSAU) {
1059 bool Changed = false;
1060
1061 // If loop header has only one reachable successor we should keep looking for
1062 // trivial condition candidates in the successor as well. An alternative is
1063 // to constant fold conditions and merge successors into loop header (then we
1064 // only need to check header's terminator). The reason for not doing this in
1065 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1066 // invariants. Folding dead branches could either eliminate the current loop
1067 // or make other loops unreachable. LCSSA form might also not be preserved
1068 // after deleting branches. The following code keeps traversing loop header's
1069 // successors until it finds the trivial condition candidate (condition that
1070 // is not a constant). Since unswitching generates branches with constant
1071 // conditions, this scenario could be very common in practice.
1072 BasicBlock *CurrentBB = L.getHeader();
1074 Visited.insert(CurrentBB);
1075 do {
1076 // Check if there are any side-effecting instructions (e.g. stores, calls,
1077 // volatile loads) in the part of the loop that the code *would* execute
1078 // without unswitching.
1079 if (MSSAU) // Possible early exit with MSSA
1080 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1081 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1082 return Changed;
1083 if (llvm::any_of(*CurrentBB,
1084 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1085 return Changed;
1086
1087 Instruction *CurrentTerm = CurrentBB->getTerminator();
1088
1089 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1090 // Don't bother trying to unswitch past a switch with a constant
1091 // condition. This should be removed prior to running this pass by
1092 // simplifycfg.
1093 if (isa<Constant>(SI->getCondition()))
1094 return Changed;
1095
1096 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1097 // Couldn't unswitch this one so we're done.
1098 return Changed;
1099
1100 // Mark that we managed to unswitch something.
1101 Changed = true;
1102
1103 // If unswitching turned the terminator into an unconditional branch then
1104 // we can continue. The unswitching logic specifically works to fold any
1105 // cases it can into an unconditional branch to make it easier to
1106 // recognize here.
1107 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1108 if (!BI || BI->isConditional())
1109 return Changed;
1110
1111 CurrentBB = BI->getSuccessor(0);
1112 continue;
1113 }
1114
1115 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1116 if (!BI)
1117 // We do not understand other terminator instructions.
1118 return Changed;
1119
1120 // Don't bother trying to unswitch past an unconditional branch or a branch
1121 // with a constant value. These should be removed by simplifycfg prior to
1122 // running this pass.
1123 if (!BI->isConditional() ||
1124 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1125 return Changed;
1126
1127 // Found a trivial condition candidate: non-foldable conditional branch. If
1128 // we fail to unswitch this, we can't do anything else that is trivial.
1129 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1130 return Changed;
1131
1132 // Mark that we managed to unswitch something.
1133 Changed = true;
1134
1135 // If we only unswitched some of the conditions feeding the branch, we won't
1136 // have collapsed it to a single successor.
1137 BI = cast<BranchInst>(CurrentBB->getTerminator());
1138 if (BI->isConditional())
1139 return Changed;
1140
1141 // Follow the newly unconditional branch into its successor.
1142 CurrentBB = BI->getSuccessor(0);
1143
1144 // When continuing, if we exit the loop or reach a previous visited block,
1145 // then we can not reach any trivial condition candidates (unfoldable
1146 // branch instructions or switch instructions) and no unswitch can happen.
1147 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1148
1149 return Changed;
1150}
1151
1152/// Build the cloned blocks for an unswitched copy of the given loop.
1153///
1154/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1155/// after the split block (`SplitBB`) that will be used to select between the
1156/// cloned and original loop.
1157///
1158/// This routine handles cloning all of the necessary loop blocks and exit
1159/// blocks including rewriting their instructions and the relevant PHI nodes.
1160/// Any loop blocks or exit blocks which are dominated by a different successor
1161/// than the one for this clone of the loop blocks can be trivially skipped. We
1162/// use the `DominatingSucc` map to determine whether a block satisfies that
1163/// property with a simple map lookup.
1164///
1165/// It also correctly creates the unconditional branch in the cloned
1166/// unswitched parent block to only point at the unswitched successor.
1167///
1168/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1169/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1170/// the cloned blocks (and their loops) are left without full `LoopInfo`
1171/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1172/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1173/// instead the caller must recompute an accurate DT. It *does* correctly
1174/// update the `AssumptionCache` provided in `AC`.
1176 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1177 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1178 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1180 ValueToValueMapTy &VMap,
1182 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1183 ScalarEvolution *SE) {
1185 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1186
1187 // We will need to clone a bunch of blocks, wrap up the clone operation in
1188 // a helper.
1189 auto CloneBlock = [&](BasicBlock *OldBB) {
1190 // Clone the basic block and insert it before the new preheader.
1191 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1192 NewBB->moveBefore(LoopPH);
1193
1194 // Record this block and the mapping.
1195 NewBlocks.push_back(NewBB);
1196 VMap[OldBB] = NewBB;
1197
1198 return NewBB;
1199 };
1200
1201 // We skip cloning blocks when they have a dominating succ that is not the
1202 // succ we are cloning for.
1203 auto SkipBlock = [&](BasicBlock *BB) {
1204 auto It = DominatingSucc.find(BB);
1205 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1206 };
1207
1208 // First, clone the preheader.
1209 auto *ClonedPH = CloneBlock(LoopPH);
1210
1211 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1212 for (auto *LoopBB : L.blocks())
1213 if (!SkipBlock(LoopBB))
1214 CloneBlock(LoopBB);
1215
1216 // Split all the loop exit edges so that when we clone the exit blocks, if
1217 // any of the exit blocks are *also* a preheader for some other loop, we
1218 // don't create multiple predecessors entering the loop header.
1219 for (auto *ExitBB : ExitBlocks) {
1220 if (SkipBlock(ExitBB))
1221 continue;
1222
1223 // When we are going to clone an exit, we don't need to clone all the
1224 // instructions in the exit block and we want to ensure we have an easy
1225 // place to merge the CFG, so split the exit first. This is always safe to
1226 // do because there cannot be any non-loop predecessors of a loop exit in
1227 // loop simplified form.
1228 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1229
1230 // Rearrange the names to make it easier to write test cases by having the
1231 // exit block carry the suffix rather than the merge block carrying the
1232 // suffix.
1233 MergeBB->takeName(ExitBB);
1234 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1235
1236 // Now clone the original exit block.
1237 auto *ClonedExitBB = CloneBlock(ExitBB);
1238 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1239 "Exit block should have been split to have one successor!");
1240 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1241 "Cloned exit block has the wrong successor!");
1242
1243 // Remap any cloned instructions and create a merge phi node for them.
1244 for (auto ZippedInsts : llvm::zip_first(
1245 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1246 llvm::make_range(ClonedExitBB->begin(),
1247 std::prev(ClonedExitBB->end())))) {
1248 Instruction &I = std::get<0>(ZippedInsts);
1249 Instruction &ClonedI = std::get<1>(ZippedInsts);
1250
1251 // The only instructions in the exit block should be PHI nodes and
1252 // potentially a landing pad.
1253 assert(
1255 "Bad instruction in exit block!");
1256 // We should have a value map between the instruction and its clone.
1257 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1258
1259 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1260 if (SE)
1261 if (auto *PN = dyn_cast<PHINode>(&I))
1263
1264 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1265
1266 auto *MergePN =
1267 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1268 MergePN->insertBefore(InsertPt);
1269 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1270 I.replaceAllUsesWith(MergePN);
1271 MergePN->addIncoming(&I, ExitBB);
1272 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1273 }
1274 }
1275
1276 // Rewrite the instructions in the cloned blocks to refer to the instructions
1277 // in the cloned blocks. We have to do this as a second pass so that we have
1278 // everything available. Also, we have inserted new instructions which may
1279 // include assume intrinsics, so we update the assumption cache while
1280 // processing this.
1281 Module *M = ClonedPH->getParent()->getParent();
1282 for (auto *ClonedBB : NewBlocks)
1283 for (Instruction &I : *ClonedBB) {
1284 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1286 RemapInstruction(&I, VMap,
1288 if (auto *II = dyn_cast<AssumeInst>(&I))
1290 }
1291
1292 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1293 // have spurious incoming values.
1294 for (auto *LoopBB : L.blocks())
1295 if (SkipBlock(LoopBB))
1296 for (auto *SuccBB : successors(LoopBB))
1297 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1298 for (PHINode &PN : ClonedSuccBB->phis())
1299 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1300
1301 // Remove the cloned parent as a predecessor of any successor we ended up
1302 // cloning other than the unswitched one.
1303 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1304 for (auto *SuccBB : successors(ParentBB)) {
1305 if (SuccBB == UnswitchedSuccBB)
1306 continue;
1307
1308 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1309 if (!ClonedSuccBB)
1310 continue;
1311
1312 ClonedSuccBB->removePredecessor(ClonedParentBB,
1313 /*KeepOneInputPHIs*/ true);
1314 }
1315
1316 // Replace the cloned branch with an unconditional branch to the cloned
1317 // unswitched successor.
1318 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1319 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1320 // Trivial Simplification. If Terminator is a conditional branch and
1321 // condition becomes dead - erase it.
1322 Value *ClonedConditionToErase = nullptr;
1323 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1324 ClonedConditionToErase = BI->getCondition();
1325 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1326 ClonedConditionToErase = SI->getCondition();
1327
1328 Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1329 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1330 ClonedTerminator->eraseFromParent();
1331
1332 if (ClonedConditionToErase)
1333 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1334 MSSAU);
1335
1336 // If there are duplicate entries in the PHI nodes because of multiple edges
1337 // to the unswitched successor, we need to nuke all but one as we replaced it
1338 // with a direct branch.
1339 for (PHINode &PN : ClonedSuccBB->phis()) {
1340 bool Found = false;
1341 // Loop over the incoming operands backwards so we can easily delete as we
1342 // go without invalidating the index.
1343 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1344 if (PN.getIncomingBlock(i) != ClonedParentBB)
1345 continue;
1346 if (!Found) {
1347 Found = true;
1348 continue;
1349 }
1350 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1351 }
1352 }
1353
1354 // Record the domtree updates for the new blocks.
1356 for (auto *ClonedBB : NewBlocks) {
1357 for (auto *SuccBB : successors(ClonedBB))
1358 if (SuccSet.insert(SuccBB).second)
1359 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1360 SuccSet.clear();
1361 }
1362
1363 return ClonedPH;
1364}
1365
1366/// Recursively clone the specified loop and all of its children.
1367///
1368/// The target parent loop for the clone should be provided, or can be null if
1369/// the clone is a top-level loop. While cloning, all the blocks are mapped
1370/// with the provided value map. The entire original loop must be present in
1371/// the value map. The cloned loop is returned.
1372static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1373 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1374 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1375 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1376 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1377 for (auto *BB : OrigL.blocks()) {
1378 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1379 ClonedL.addBlockEntry(ClonedBB);
1380 if (LI.getLoopFor(BB) == &OrigL)
1381 LI.changeLoopFor(ClonedBB, &ClonedL);
1382 }
1383 };
1384
1385 // We specially handle the first loop because it may get cloned into
1386 // a different parent and because we most commonly are cloning leaf loops.
1387 Loop *ClonedRootL = LI.AllocateLoop();
1388 if (RootParentL)
1389 RootParentL->addChildLoop(ClonedRootL);
1390 else
1391 LI.addTopLevelLoop(ClonedRootL);
1392 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1393
1394 if (OrigRootL.isInnermost())
1395 return ClonedRootL;
1396
1397 // If we have a nest, we can quickly clone the entire loop nest using an
1398 // iterative approach because it is a tree. We keep the cloned parent in the
1399 // data structure to avoid repeatedly querying through a map to find it.
1400 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1401 // Build up the loops to clone in reverse order as we'll clone them from the
1402 // back.
1403 for (Loop *ChildL : llvm::reverse(OrigRootL))
1404 LoopsToClone.push_back({ClonedRootL, ChildL});
1405 do {
1406 Loop *ClonedParentL, *L;
1407 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1408 Loop *ClonedL = LI.AllocateLoop();
1409 ClonedParentL->addChildLoop(ClonedL);
1410 AddClonedBlocksToLoop(*L, *ClonedL);
1411 for (Loop *ChildL : llvm::reverse(*L))
1412 LoopsToClone.push_back({ClonedL, ChildL});
1413 } while (!LoopsToClone.empty());
1414
1415 return ClonedRootL;
1416}
1417
1418/// Build the cloned loops of an original loop from unswitching.
1419///
1420/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1421/// operation. We need to re-verify that there even is a loop (as the backedge
1422/// may not have been cloned), and even if there are remaining backedges the
1423/// backedge set may be different. However, we know that each child loop is
1424/// undisturbed, we only need to find where to place each child loop within
1425/// either any parent loop or within a cloned version of the original loop.
1426///
1427/// Because child loops may end up cloned outside of any cloned version of the
1428/// original loop, multiple cloned sibling loops may be created. All of them
1429/// are returned so that the newly introduced loop nest roots can be
1430/// identified.
1431static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1432 const ValueToValueMapTy &VMap, LoopInfo &LI,
1433 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1434 Loop *ClonedL = nullptr;
1435
1436 auto *OrigPH = OrigL.getLoopPreheader();
1437 auto *OrigHeader = OrigL.getHeader();
1438
1439 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1440 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1441
1442 // We need to know the loops of the cloned exit blocks to even compute the
1443 // accurate parent loop. If we only clone exits to some parent of the
1444 // original parent, we want to clone into that outer loop. We also keep track
1445 // of the loops that our cloned exit blocks participate in.
1446 Loop *ParentL = nullptr;
1447 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1449 ClonedExitsInLoops.reserve(ExitBlocks.size());
1450 for (auto *ExitBB : ExitBlocks)
1451 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1452 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1453 ExitLoopMap[ClonedExitBB] = ExitL;
1454 ClonedExitsInLoops.push_back(ClonedExitBB);
1455 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1456 ParentL = ExitL;
1457 }
1458 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1459 ParentL->contains(OrigL.getParentLoop())) &&
1460 "The computed parent loop should always contain (or be) the parent of "
1461 "the original loop.");
1462
1463 // We build the set of blocks dominated by the cloned header from the set of
1464 // cloned blocks out of the original loop. While not all of these will
1465 // necessarily be in the cloned loop, it is enough to establish that they
1466 // aren't in unreachable cycles, etc.
1467 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1468 for (auto *BB : OrigL.blocks())
1469 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1470 ClonedLoopBlocks.insert(ClonedBB);
1471
1472 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1473 // skipped cloning some region of this loop which can in turn skip some of
1474 // the backedges so we have to rebuild the blocks in the loop based on the
1475 // backedges that remain after cloning.
1477 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1478 for (auto *Pred : predecessors(ClonedHeader)) {
1479 // The only possible non-loop header predecessor is the preheader because
1480 // we know we cloned the loop in simplified form.
1481 if (Pred == ClonedPH)
1482 continue;
1483
1484 // Because the loop was in simplified form, the only non-loop predecessor
1485 // should be the preheader.
1486 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1487 "header other than the preheader "
1488 "that is not part of the loop!");
1489
1490 // Insert this block into the loop set and on the first visit (and if it
1491 // isn't the header we're currently walking) put it into the worklist to
1492 // recurse through.
1493 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1494 Worklist.push_back(Pred);
1495 }
1496
1497 // If we had any backedges then there *is* a cloned loop. Put the header into
1498 // the loop set and then walk the worklist backwards to find all the blocks
1499 // that remain within the loop after cloning.
1500 if (!BlocksInClonedLoop.empty()) {
1501 BlocksInClonedLoop.insert(ClonedHeader);
1502
1503 while (!Worklist.empty()) {
1504 BasicBlock *BB = Worklist.pop_back_val();
1505 assert(BlocksInClonedLoop.count(BB) &&
1506 "Didn't put block into the loop set!");
1507
1508 // Insert any predecessors that are in the possible set into the cloned
1509 // set, and if the insert is successful, add them to the worklist. Note
1510 // that we filter on the blocks that are definitely reachable via the
1511 // backedge to the loop header so we may prune out dead code within the
1512 // cloned loop.
1513 for (auto *Pred : predecessors(BB))
1514 if (ClonedLoopBlocks.count(Pred) &&
1515 BlocksInClonedLoop.insert(Pred).second)
1516 Worklist.push_back(Pred);
1517 }
1518
1519 ClonedL = LI.AllocateLoop();
1520 if (ParentL) {
1521 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1522 ParentL->addChildLoop(ClonedL);
1523 } else {
1524 LI.addTopLevelLoop(ClonedL);
1525 }
1526 NonChildClonedLoops.push_back(ClonedL);
1527
1528 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1529 // We don't want to just add the cloned loop blocks based on how we
1530 // discovered them. The original order of blocks was carefully built in
1531 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1532 // that logic, we just re-walk the original blocks (and those of the child
1533 // loops) and filter them as we add them into the cloned loop.
1534 for (auto *BB : OrigL.blocks()) {
1535 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1536 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1537 continue;
1538
1539 // Directly add the blocks that are only in this loop.
1540 if (LI.getLoopFor(BB) == &OrigL) {
1541 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1542 continue;
1543 }
1544
1545 // We want to manually add it to this loop and parents.
1546 // Registering it with LoopInfo will happen when we clone the top
1547 // loop for this block.
1548 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1549 PL->addBlockEntry(ClonedBB);
1550 }
1551
1552 // Now add each child loop whose header remains within the cloned loop. All
1553 // of the blocks within the loop must satisfy the same constraints as the
1554 // header so once we pass the header checks we can just clone the entire
1555 // child loop nest.
1556 for (Loop *ChildL : OrigL) {
1557 auto *ClonedChildHeader =
1558 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1559 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1560 continue;
1561
1562#ifndef NDEBUG
1563 // We should never have a cloned child loop header but fail to have
1564 // all of the blocks for that child loop.
1565 for (auto *ChildLoopBB : ChildL->blocks())
1566 assert(BlocksInClonedLoop.count(
1567 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1568 "Child cloned loop has a header within the cloned outer "
1569 "loop but not all of its blocks!");
1570#endif
1571
1572 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1573 }
1574 }
1575
1576 // Now that we've handled all the components of the original loop that were
1577 // cloned into a new loop, we still need to handle anything from the original
1578 // loop that wasn't in a cloned loop.
1579
1580 // Figure out what blocks are left to place within any loop nest containing
1581 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1582 // them.
1583 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1584 if (BlocksInClonedLoop.empty())
1585 UnloopedBlockSet.insert(ClonedPH);
1586 for (auto *ClonedBB : ClonedLoopBlocks)
1587 if (!BlocksInClonedLoop.count(ClonedBB))
1588 UnloopedBlockSet.insert(ClonedBB);
1589
1590 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1591 // backwards across these to process them inside out. The order shouldn't
1592 // matter as we're just trying to build up the map from inside-out; we use
1593 // the map in a more stably ordered way below.
1594 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1595 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1596 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1597 ExitLoopMap.lookup(RHS)->getLoopDepth();
1598 });
1599
1600 // Populate the existing ExitLoopMap with everything reachable from each
1601 // exit, starting from the inner most exit.
1602 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1603 assert(Worklist.empty() && "Didn't clear worklist!");
1604
1605 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1606 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1607
1608 // Walk the CFG back until we hit the cloned PH adding everything reachable
1609 // and in the unlooped set to this exit block's loop.
1610 Worklist.push_back(ExitBB);
1611 do {
1612 BasicBlock *BB = Worklist.pop_back_val();
1613 // We can stop recursing at the cloned preheader (if we get there).
1614 if (BB == ClonedPH)
1615 continue;
1616
1617 for (BasicBlock *PredBB : predecessors(BB)) {
1618 // If this pred has already been moved to our set or is part of some
1619 // (inner) loop, no update needed.
1620 if (!UnloopedBlockSet.erase(PredBB)) {
1621 assert(
1622 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1623 "Predecessor not mapped to a loop!");
1624 continue;
1625 }
1626
1627 // We just insert into the loop set here. We'll add these blocks to the
1628 // exit loop after we build up the set in an order that doesn't rely on
1629 // predecessor order (which in turn relies on use list order).
1630 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1631 (void)Inserted;
1632 assert(Inserted && "Should only visit an unlooped block once!");
1633
1634 // And recurse through to its predecessors.
1635 Worklist.push_back(PredBB);
1636 }
1637 } while (!Worklist.empty());
1638 }
1639
1640 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1641 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1642 // in their original order adding them to the correct loop.
1643
1644 // We need a stable insertion order. We use the order of the original loop
1645 // order and map into the correct parent loop.
1646 for (auto *BB : llvm::concat<BasicBlock *const>(
1647 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1648 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1649 OuterL->addBasicBlockToLoop(BB, LI);
1650
1651#ifndef NDEBUG
1652 for (auto &BBAndL : ExitLoopMap) {
1653 auto *BB = BBAndL.first;
1654 auto *OuterL = BBAndL.second;
1655 assert(LI.getLoopFor(BB) == OuterL &&
1656 "Failed to put all blocks into outer loops!");
1657 }
1658#endif
1659
1660 // Now that all the blocks are placed into the correct containing loop in the
1661 // absence of child loops, find all the potentially cloned child loops and
1662 // clone them into whatever outer loop we placed their header into.
1663 for (Loop *ChildL : OrigL) {
1664 auto *ClonedChildHeader =
1665 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1666 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1667 continue;
1668
1669#ifndef NDEBUG
1670 for (auto *ChildLoopBB : ChildL->blocks())
1671 assert(VMap.count(ChildLoopBB) &&
1672 "Cloned a child loop header but not all of that loops blocks!");
1673#endif
1674
1675 NonChildClonedLoops.push_back(cloneLoopNest(
1676 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1677 }
1678}
1679
1680static void
1682 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1683 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1684 // Find all the dead clones, and remove them from their successors.
1686 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1687 for (const auto &VMap : VMaps)
1688 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1689 if (!DT.isReachableFromEntry(ClonedBB)) {
1690 for (BasicBlock *SuccBB : successors(ClonedBB))
1691 SuccBB->removePredecessor(ClonedBB);
1692 DeadBlocks.push_back(ClonedBB);
1693 }
1694
1695 // Remove all MemorySSA in the dead blocks
1696 if (MSSAU) {
1697 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1698 DeadBlocks.end());
1699 MSSAU->removeBlocks(DeadBlockSet);
1700 }
1701
1702 // Drop any remaining references to break cycles.
1703 for (BasicBlock *BB : DeadBlocks)
1704 BB->dropAllReferences();
1705 // Erase them from the IR.
1706 for (BasicBlock *BB : DeadBlocks)
1707 BB->eraseFromParent();
1708}
1709
1712 DominatorTree &DT, LoopInfo &LI,
1713 MemorySSAUpdater *MSSAU,
1714 ScalarEvolution *SE,
1715 LPMUpdater &LoopUpdater) {
1716 // Find all the dead blocks tied to this loop, and remove them from their
1717 // successors.
1719
1720 // Start with loop/exit blocks and get a transitive closure of reachable dead
1721 // blocks.
1722 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1723 ExitBlocks.end());
1724 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1725 while (!DeathCandidates.empty()) {
1726 auto *BB = DeathCandidates.pop_back_val();
1727 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1728 for (BasicBlock *SuccBB : successors(BB)) {
1729 SuccBB->removePredecessor(BB);
1730 DeathCandidates.push_back(SuccBB);
1731 }
1732 DeadBlockSet.insert(BB);
1733 }
1734 }
1735
1736 // Remove all MemorySSA in the dead blocks
1737 if (MSSAU)
1738 MSSAU->removeBlocks(DeadBlockSet);
1739
1740 // Filter out the dead blocks from the exit blocks list so that it can be
1741 // used in the caller.
1742 llvm::erase_if(ExitBlocks,
1743 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1744
1745 // Walk from this loop up through its parents removing all of the dead blocks.
1746 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1747 for (auto *BB : DeadBlockSet)
1748 ParentL->getBlocksSet().erase(BB);
1749 llvm::erase_if(ParentL->getBlocksVector(),
1750 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1751 }
1752
1753 // Now delete the dead child loops. This raw delete will clear them
1754 // recursively.
1755 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1756 if (!DeadBlockSet.count(ChildL->getHeader()))
1757 return false;
1758
1759 assert(llvm::all_of(ChildL->blocks(),
1760 [&](BasicBlock *ChildBB) {
1761 return DeadBlockSet.count(ChildBB);
1762 }) &&
1763 "If the child loop header is dead all blocks in the child loop must "
1764 "be dead as well!");
1765 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1766 if (SE)
1768 LI.destroy(ChildL);
1769 return true;
1770 });
1771
1772 // Remove the loop mappings for the dead blocks and drop all the references
1773 // from these blocks to others to handle cyclic references as we start
1774 // deleting the blocks themselves.
1775 for (auto *BB : DeadBlockSet) {
1776 // Check that the dominator tree has already been updated.
1777 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1778 LI.changeLoopFor(BB, nullptr);
1779 // Drop all uses of the instructions to make sure we won't have dangling
1780 // uses in other blocks.
1781 for (auto &I : *BB)
1782 if (!I.use_empty())
1783 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1784 BB->dropAllReferences();
1785 }
1786
1787 // Actually delete the blocks now that they've been fully unhooked from the
1788 // IR.
1789 for (auto *BB : DeadBlockSet)
1790 BB->eraseFromParent();
1791}
1792
1793/// Recompute the set of blocks in a loop after unswitching.
1794///
1795/// This walks from the original headers predecessors to rebuild the loop. We
1796/// take advantage of the fact that new blocks can't have been added, and so we
1797/// filter by the original loop's blocks. This also handles potentially
1798/// unreachable code that we don't want to explore but might be found examining
1799/// the predecessors of the header.
1800///
1801/// If the original loop is no longer a loop, this will return an empty set. If
1802/// it remains a loop, all the blocks within it will be added to the set
1803/// (including those blocks in inner loops).
1805 LoopInfo &LI) {
1807
1808 auto *PH = L.getLoopPreheader();
1809 auto *Header = L.getHeader();
1810
1811 // A worklist to use while walking backwards from the header.
1813
1814 // First walk the predecessors of the header to find the backedges. This will
1815 // form the basis of our walk.
1816 for (auto *Pred : predecessors(Header)) {
1817 // Skip the preheader.
1818 if (Pred == PH)
1819 continue;
1820
1821 // Because the loop was in simplified form, the only non-loop predecessor
1822 // is the preheader.
1823 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1824 "than the preheader that is not part of the "
1825 "loop!");
1826
1827 // Insert this block into the loop set and on the first visit and, if it
1828 // isn't the header we're currently walking, put it into the worklist to
1829 // recurse through.
1830 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1831 Worklist.push_back(Pred);
1832 }
1833
1834 // If no backedges were found, we're done.
1835 if (LoopBlockSet.empty())
1836 return LoopBlockSet;
1837
1838 // We found backedges, recurse through them to identify the loop blocks.
1839 while (!Worklist.empty()) {
1840 BasicBlock *BB = Worklist.pop_back_val();
1841 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1842
1843 // No need to walk past the header.
1844 if (BB == Header)
1845 continue;
1846
1847 // Because we know the inner loop structure remains valid we can use the
1848 // loop structure to jump immediately across the entire nested loop.
1849 // Further, because it is in loop simplified form, we can directly jump
1850 // to its preheader afterward.
1851 if (Loop *InnerL = LI.getLoopFor(BB))
1852 if (InnerL != &L) {
1853 assert(L.contains(InnerL) &&
1854 "Should not reach a loop *outside* this loop!");
1855 // The preheader is the only possible predecessor of the loop so
1856 // insert it into the set and check whether it was already handled.
1857 auto *InnerPH = InnerL->getLoopPreheader();
1858 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1859 "but not contain the inner loop "
1860 "preheader!");
1861 if (!LoopBlockSet.insert(InnerPH).second)
1862 // The only way to reach the preheader is through the loop body
1863 // itself so if it has been visited the loop is already handled.
1864 continue;
1865
1866 // Insert all of the blocks (other than those already present) into
1867 // the loop set. We expect at least the block that led us to find the
1868 // inner loop to be in the block set, but we may also have other loop
1869 // blocks if they were already enqueued as predecessors of some other
1870 // outer loop block.
1871 for (auto *InnerBB : InnerL->blocks()) {
1872 if (InnerBB == BB) {
1873 assert(LoopBlockSet.count(InnerBB) &&
1874 "Block should already be in the set!");
1875 continue;
1876 }
1877
1878 LoopBlockSet.insert(InnerBB);
1879 }
1880
1881 // Add the preheader to the worklist so we will continue past the
1882 // loop body.
1883 Worklist.push_back(InnerPH);
1884 continue;
1885 }
1886
1887 // Insert any predecessors that were in the original loop into the new
1888 // set, and if the insert is successful, add them to the worklist.
1889 for (auto *Pred : predecessors(BB))
1890 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1891 Worklist.push_back(Pred);
1892 }
1893
1894 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1895
1896 // We've found all the blocks participating in the loop, return our completed
1897 // set.
1898 return LoopBlockSet;
1899}
1900
1901/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1902///
1903/// The removal may have removed some child loops entirely but cannot have
1904/// disturbed any remaining child loops. However, they may need to be hoisted
1905/// to the parent loop (or to be top-level loops). The original loop may be
1906/// completely removed.
1907///
1908/// The sibling loops resulting from this update are returned. If the original
1909/// loop remains a valid loop, it will be the first entry in this list with all
1910/// of the newly sibling loops following it.
1911///
1912/// Returns true if the loop remains a loop after unswitching, and false if it
1913/// is no longer a loop after unswitching (and should not continue to be
1914/// referenced).
1916 LoopInfo &LI,
1917 SmallVectorImpl<Loop *> &HoistedLoops,
1918 ScalarEvolution *SE) {
1919 auto *PH = L.getLoopPreheader();
1920
1921 // Compute the actual parent loop from the exit blocks. Because we may have
1922 // pruned some exits the loop may be different from the original parent.
1923 Loop *ParentL = nullptr;
1924 SmallVector<Loop *, 4> ExitLoops;
1925 SmallVector<BasicBlock *, 4> ExitsInLoops;
1926 ExitsInLoops.reserve(ExitBlocks.size());
1927 for (auto *ExitBB : ExitBlocks)
1928 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1929 ExitLoops.push_back(ExitL);
1930 ExitsInLoops.push_back(ExitBB);
1931 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1932 ParentL = ExitL;
1933 }
1934
1935 // Recompute the blocks participating in this loop. This may be empty if it
1936 // is no longer a loop.
1937 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1938
1939 // If we still have a loop, we need to re-set the loop's parent as the exit
1940 // block set changing may have moved it within the loop nest. Note that this
1941 // can only happen when this loop has a parent as it can only hoist the loop
1942 // *up* the nest.
1943 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1944 // Remove this loop's (original) blocks from all of the intervening loops.
1945 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1946 IL = IL->getParentLoop()) {
1947 IL->getBlocksSet().erase(PH);
1948 for (auto *BB : L.blocks())
1949 IL->getBlocksSet().erase(BB);
1950 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1951 return BB == PH || L.contains(BB);
1952 });
1953 }
1954
1955 LI.changeLoopFor(PH, ParentL);
1956 L.getParentLoop()->removeChildLoop(&L);
1957 if (ParentL)
1958 ParentL->addChildLoop(&L);
1959 else
1960 LI.addTopLevelLoop(&L);
1961 }
1962
1963 // Now we update all the blocks which are no longer within the loop.
1964 auto &Blocks = L.getBlocksVector();
1965 auto BlocksSplitI =
1966 LoopBlockSet.empty()
1967 ? Blocks.begin()
1968 : std::stable_partition(
1969 Blocks.begin(), Blocks.end(),
1970 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1971
1972 // Before we erase the list of unlooped blocks, build a set of them.
1973 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1974 if (LoopBlockSet.empty())
1975 UnloopedBlocks.insert(PH);
1976
1977 // Now erase these blocks from the loop.
1978 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1979 L.getBlocksSet().erase(BB);
1980 Blocks.erase(BlocksSplitI, Blocks.end());
1981
1982 // Sort the exits in ascending loop depth, we'll work backwards across these
1983 // to process them inside out.
1984 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1985 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1986 });
1987
1988 // We'll build up a set for each exit loop.
1989 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1990 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1991
1992 auto RemoveUnloopedBlocksFromLoop =
1993 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1994 for (auto *BB : UnloopedBlocks)
1995 L.getBlocksSet().erase(BB);
1996 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1997 return UnloopedBlocks.count(BB);
1998 });
1999 };
2000
2002 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2003 assert(Worklist.empty() && "Didn't clear worklist!");
2004 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2005
2006 // Grab the next exit block, in decreasing loop depth order.
2007 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2008 Loop &ExitL = *LI.getLoopFor(ExitBB);
2009 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2010
2011 // Erase all of the unlooped blocks from the loops between the previous
2012 // exit loop and this exit loop. This works because the ExitInLoops list is
2013 // sorted in increasing order of loop depth and thus we visit loops in
2014 // decreasing order of loop depth.
2015 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2016 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2017
2018 // Walk the CFG back until we hit the cloned PH adding everything reachable
2019 // and in the unlooped set to this exit block's loop.
2020 Worklist.push_back(ExitBB);
2021 do {
2022 BasicBlock *BB = Worklist.pop_back_val();
2023 // We can stop recursing at the cloned preheader (if we get there).
2024 if (BB == PH)
2025 continue;
2026
2027 for (BasicBlock *PredBB : predecessors(BB)) {
2028 // If this pred has already been moved to our set or is part of some
2029 // (inner) loop, no update needed.
2030 if (!UnloopedBlocks.erase(PredBB)) {
2031 assert((NewExitLoopBlocks.count(PredBB) ||
2032 ExitL.contains(LI.getLoopFor(PredBB))) &&
2033 "Predecessor not in a nested loop (or already visited)!");
2034 continue;
2035 }
2036
2037 // We just insert into the loop set here. We'll add these blocks to the
2038 // exit loop after we build up the set in a deterministic order rather
2039 // than the predecessor-influenced visit order.
2040 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2041 (void)Inserted;
2042 assert(Inserted && "Should only visit an unlooped block once!");
2043
2044 // And recurse through to its predecessors.
2045 Worklist.push_back(PredBB);
2046 }
2047 } while (!Worklist.empty());
2048
2049 // If blocks in this exit loop were directly part of the original loop (as
2050 // opposed to a child loop) update the map to point to this exit loop. This
2051 // just updates a map and so the fact that the order is unstable is fine.
2052 for (auto *BB : NewExitLoopBlocks)
2053 if (Loop *BBL = LI.getLoopFor(BB))
2054 if (BBL == &L || !L.contains(BBL))
2055 LI.changeLoopFor(BB, &ExitL);
2056
2057 // We will remove the remaining unlooped blocks from this loop in the next
2058 // iteration or below.
2059 NewExitLoopBlocks.clear();
2060 }
2061
2062 // Any remaining unlooped blocks are no longer part of any loop unless they
2063 // are part of some child loop.
2064 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2065 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2066 for (auto *BB : UnloopedBlocks)
2067 if (Loop *BBL = LI.getLoopFor(BB))
2068 if (BBL == &L || !L.contains(BBL))
2069 LI.changeLoopFor(BB, nullptr);
2070
2071 // Sink all the child loops whose headers are no longer in the loop set to
2072 // the parent (or to be top level loops). We reach into the loop and directly
2073 // update its subloop vector to make this batch update efficient.
2074 auto &SubLoops = L.getSubLoopsVector();
2075 auto SubLoopsSplitI =
2076 LoopBlockSet.empty()
2077 ? SubLoops.begin()
2078 : std::stable_partition(
2079 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2080 return LoopBlockSet.count(SubL->getHeader());
2081 });
2082 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2083 HoistedLoops.push_back(HoistedL);
2084 HoistedL->setParentLoop(nullptr);
2085
2086 // To compute the new parent of this hoisted loop we look at where we
2087 // placed the preheader above. We can't lookup the header itself because we
2088 // retained the mapping from the header to the hoisted loop. But the
2089 // preheader and header should have the exact same new parent computed
2090 // based on the set of exit blocks from the original loop as the preheader
2091 // is a predecessor of the header and so reached in the reverse walk. And
2092 // because the loops were all in simplified form the preheader of the
2093 // hoisted loop can't be part of some *other* loop.
2094 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2095 NewParentL->addChildLoop(HoistedL);
2096 else
2097 LI.addTopLevelLoop(HoistedL);
2098 }
2099 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2100
2101 // Actually delete the loop if nothing remained within it.
2102 if (Blocks.empty()) {
2103 assert(SubLoops.empty() &&
2104 "Failed to remove all subloops from the original loop!");
2105 if (Loop *ParentL = L.getParentLoop())
2106 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2107 else
2108 LI.removeLoop(llvm::find(LI, &L));
2109 // markLoopAsDeleted for L should be triggered by the caller (it is
2110 // typically done within postUnswitch).
2111 if (SE)
2113 LI.destroy(&L);
2114 return false;
2115 }
2116
2117 return true;
2118}
2119
2120/// Helper to visit a dominator subtree, invoking a callable on each node.
2121///
2122/// Returning false at any point will stop walking past that node of the tree.
2123template <typename CallableT>
2124void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2126 DomWorklist.push_back(DT[BB]);
2127#ifndef NDEBUG
2129 Visited.insert(DT[BB]);
2130#endif
2131 do {
2132 DomTreeNode *N = DomWorklist.pop_back_val();
2133
2134 // Visit this node.
2135 if (!Callable(N->getBlock()))
2136 continue;
2137
2138 // Accumulate the child nodes.
2139 for (DomTreeNode *ChildN : *N) {
2140 assert(Visited.insert(ChildN).second &&
2141 "Cannot visit a node twice when walking a tree!");
2142 DomWorklist.push_back(ChildN);
2143 }
2144 } while (!DomWorklist.empty());
2145}
2146
2148 bool CurrentLoopValid, bool PartiallyInvariant,
2149 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2150 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2151 if (!NewLoops.empty())
2152 U.addSiblingLoops(NewLoops);
2153
2154 // If the current loop remains valid, we should revisit it to catch any
2155 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2156 if (CurrentLoopValid) {
2157 if (PartiallyInvariant) {
2158 // Mark the new loop as partially unswitched, to avoid unswitching on
2159 // the same condition again.
2160 auto &Context = L.getHeader()->getContext();
2161 MDNode *DisableUnswitchMD = MDNode::get(
2162 Context,
2163 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2165 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2166 {DisableUnswitchMD});
2167 L.setLoopID(NewLoopID);
2168 } else if (InjectedCondition) {
2169 // Do the same for injection of invariant conditions.
2170 auto &Context = L.getHeader()->getContext();
2171 MDNode *DisableUnswitchMD = MDNode::get(
2172 Context,
2173 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2175 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2176 {DisableUnswitchMD});
2177 L.setLoopID(NewLoopID);
2178 } else
2179 U.revisitCurrentLoop();
2180 } else
2181 U.markLoopAsDeleted(L, LoopName);
2182}
2183
2185 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2186 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2188 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2189 auto *ParentBB = TI.getParent();
2191 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2192
2193 // Save the current loop name in a variable so that we can report it even
2194 // after it has been deleted.
2195 std::string LoopName(L.getName());
2196
2197 // We can only unswitch switches, conditional branches with an invariant
2198 // condition, or combining invariant conditions with an instruction or
2199 // partially invariant instructions.
2200 assert((SI || (BI && BI->isConditional())) &&
2201 "Can only unswitch switches and conditional branch!");
2202 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2203 bool FullUnswitch =
2204 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2205 !PartiallyInvariant);
2206 if (FullUnswitch)
2207 assert(Invariants.size() == 1 &&
2208 "Cannot have other invariants with full unswitching!");
2209 else
2211 "Partial unswitching requires an instruction as the condition!");
2212
2213 if (MSSAU && VerifyMemorySSA)
2214 MSSAU->getMemorySSA()->verifyMemorySSA();
2215
2216 // Constant and BBs tracking the cloned and continuing successor. When we are
2217 // unswitching the entire condition, this can just be trivially chosen to
2218 // unswitch towards `true`. However, when we are unswitching a set of
2219 // invariants combined with `and` or `or` or partially invariant instructions,
2220 // the combining operation determines the best direction to unswitch: we want
2221 // to unswitch the direction that will collapse the branch.
2222 bool Direction = true;
2223 int ClonedSucc = 0;
2224 if (!FullUnswitch) {
2226 (void)Cond;
2228 PartiallyInvariant) &&
2229 "Only `or`, `and`, an `select`, partially invariant instructions "
2230 "can combine invariants being unswitched.");
2231 if (!match(Cond, m_LogicalOr())) {
2232 if (match(Cond, m_LogicalAnd()) ||
2233 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2234 Direction = false;
2235 ClonedSucc = 1;
2236 }
2237 }
2238 }
2239
2240 BasicBlock *RetainedSuccBB =
2241 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2242 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2243 if (BI)
2244 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2245 else
2246 for (auto Case : SI->cases())
2247 if (Case.getCaseSuccessor() != RetainedSuccBB)
2248 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2249
2250 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2251 "Should not unswitch the same successor we are retaining!");
2252
2253 // The branch should be in this exact loop. Any inner loop's invariant branch
2254 // should be handled by unswitching that inner loop. The caller of this
2255 // routine should filter out any candidates that remain (but were skipped for
2256 // whatever reason).
2257 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2258
2259 // Compute the parent loop now before we start hacking on things.
2260 Loop *ParentL = L.getParentLoop();
2261 // Get blocks in RPO order for MSSA update, before changing the CFG.
2262 LoopBlocksRPO LBRPO(&L);
2263 if (MSSAU)
2264 LBRPO.perform(&LI);
2265
2266 // Compute the outer-most loop containing one of our exit blocks. This is the
2267 // furthest up our loopnest which can be mutated, which we will use below to
2268 // update things.
2269 Loop *OuterExitL = &L;
2271 L.getUniqueExitBlocks(ExitBlocks);
2272 for (auto *ExitBB : ExitBlocks) {
2273 // ExitBB can be an exit block for several levels in the loop nest. Make
2274 // sure we find the top most.
2275 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2276 if (!NewOuterExitL) {
2277 // We exited the entire nest with this block, so we're done.
2278 OuterExitL = nullptr;
2279 break;
2280 }
2281 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2282 OuterExitL = NewOuterExitL;
2283 }
2284
2285 // At this point, we're definitely going to unswitch something so invalidate
2286 // any cached information in ScalarEvolution for the outer most loop
2287 // containing an exit block and all nested loops.
2288 if (SE) {
2289 if (OuterExitL)
2290 SE->forgetLoop(OuterExitL);
2291 else
2292 SE->forgetTopmostLoop(&L);
2294 }
2295
2296 // If the edge from this terminator to a successor dominates that successor,
2297 // store a map from each block in its dominator subtree to it. This lets us
2298 // tell when cloning for a particular successor if a block is dominated by
2299 // some *other* successor with a single data structure. We use this to
2300 // significantly reduce cloning.
2302 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2303 UnswitchedSuccBBs))
2304 if (SuccBB->getUniquePredecessor() ||
2305 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2306 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2307 }))
2308 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2309 DominatingSucc[BB] = SuccBB;
2310 return true;
2311 });
2312
2313 // Split the preheader, so that we know that there is a safe place to insert
2314 // the conditional branch. We will change the preheader to have a conditional
2315 // branch on LoopCond. The original preheader will become the split point
2316 // between the unswitched versions, and we will have a new preheader for the
2317 // original loop.
2318 BasicBlock *SplitBB = L.getLoopPreheader();
2319 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2320
2321 // Keep track of the dominator tree updates needed.
2323
2324 // Clone the loop for each unswitched successor.
2326 VMaps.reserve(UnswitchedSuccBBs.size());
2328 for (auto *SuccBB : UnswitchedSuccBBs) {
2329 VMaps.emplace_back(new ValueToValueMapTy());
2330 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2331 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2332 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2333 }
2334
2335 // Drop metadata if we may break its semantics by moving this instr into the
2336 // split block.
2337 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2339 // Do not spend time trying to understand if we can keep it, just drop it
2340 // to save compile time.
2341 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2342 else {
2343 // It is only legal to preserve make.implicit metadata if we are
2344 // guaranteed no reach implicit null check after following this branch.
2345 ICFLoopSafetyInfo SafetyInfo;
2346 SafetyInfo.computeLoopSafetyInfo(&L);
2347 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2348 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2349 }
2350 }
2351
2352 // The stitching of the branched code back together depends on whether we're
2353 // doing full unswitching or not with the exception that we always want to
2354 // nuke the initial terminator placed in the split block.
2355 SplitBB->getTerminator()->eraseFromParent();
2356 if (FullUnswitch) {
2357 // Keep a clone of the terminator for MSSA updates.
2358 Instruction *NewTI = TI.clone();
2359 NewTI->insertInto(ParentBB, ParentBB->end());
2360
2361 // Splice the terminator from the original loop and rewrite its
2362 // successors.
2363 TI.moveBefore(*SplitBB, SplitBB->end());
2364 TI.dropLocation();
2365
2366 // First wire up the moved terminator to the preheaders.
2367 if (BI) {
2368 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2369 BI->setSuccessor(ClonedSucc, ClonedPH);
2370 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2372 if (InsertFreeze) {
2373 // We don't give any debug location to the new freeze, because the
2374 // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2375 // out of the loop.
2376 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2378 }
2379 BI->setCondition(Cond);
2380 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2381 } else {
2382 assert(SI && "Must either be a branch or switch!");
2383
2384 // Walk the cases and directly update their successors.
2385 assert(SI->getDefaultDest() == RetainedSuccBB &&
2386 "Not retaining default successor!");
2387 SI->setDefaultDest(LoopPH);
2388 for (const auto &Case : SI->cases())
2389 if (Case.getCaseSuccessor() == RetainedSuccBB)
2390 Case.setSuccessor(LoopPH);
2391 else
2392 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2393
2394 if (InsertFreeze)
2395 SI->setCondition(new FreezeInst(SI->getCondition(),
2396 SI->getCondition()->getName() + ".fr",
2397 SI->getIterator()));
2398
2399 // We need to use the set to populate domtree updates as even when there
2400 // are multiple cases pointing at the same successor we only want to
2401 // remove and insert one edge in the domtree.
2402 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2403 DTUpdates.push_back(
2404 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2405 }
2406
2407 if (MSSAU) {
2408 DT.applyUpdates(DTUpdates);
2409 DTUpdates.clear();
2410
2411 // Remove all but one edge to the retained block and all unswitched
2412 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2413 // when we know we only keep a single edge for each case.
2414 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2415 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2416 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2417
2418 for (auto &VMap : VMaps)
2419 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2420 /*IgnoreIncomingWithNoClones=*/true);
2421 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2422
2423 // Remove all edges to unswitched blocks.
2424 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2425 MSSAU->removeEdge(ParentBB, SuccBB);
2426 }
2427
2428 // Now unhook the successor relationship as we'll be replacing
2429 // the terminator with a direct branch. This is much simpler for branches
2430 // than switches so we handle those first.
2431 if (BI) {
2432 // Remove the parent as a predecessor of the unswitched successor.
2433 assert(UnswitchedSuccBBs.size() == 1 &&
2434 "Only one possible unswitched block for a branch!");
2435 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2436 UnswitchedSuccBB->removePredecessor(ParentBB,
2437 /*KeepOneInputPHIs*/ true);
2438 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2439 } else {
2440 // Note that we actually want to remove the parent block as a predecessor
2441 // of *every* case successor. The case successor is either unswitched,
2442 // completely eliminating an edge from the parent to that successor, or it
2443 // is a duplicate edge to the retained successor as the retained successor
2444 // is always the default successor and as we'll replace this with a direct
2445 // branch we no longer need the duplicate entries in the PHI nodes.
2446 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2447 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2448 "Not retaining default successor!");
2449 for (const auto &Case : NewSI->cases())
2450 Case.getCaseSuccessor()->removePredecessor(
2451 ParentBB,
2452 /*KeepOneInputPHIs*/ true);
2453
2454 // We need to use the set to populate domtree updates as even when there
2455 // are multiple cases pointing at the same successor we only want to
2456 // remove and insert one edge in the domtree.
2457 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2458 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2459 }
2460
2461 // Create a new unconditional branch to the continuing block (as opposed to
2462 // the one cloned).
2463 Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2464 NewBI->setDebugLoc(NewTI->getDebugLoc());
2465
2466 // After MSSAU update, remove the cloned terminator instruction NewTI.
2467 NewTI->eraseFromParent();
2468 } else {
2469 assert(BI && "Only branches have partial unswitching.");
2470 assert(UnswitchedSuccBBs.size() == 1 &&
2471 "Only one possible unswitched block for a branch!");
2472 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2473 // When doing a partial unswitch, we have to do a bit more work to build up
2474 // the branch in the split block.
2475 if (PartiallyInvariant)
2477 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2478 else {
2480 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2481 FreezeLoopUnswitchCond, BI, &AC, DT);
2482 }
2483 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2484
2485 if (MSSAU) {
2486 DT.applyUpdates(DTUpdates);
2487 DTUpdates.clear();
2488
2489 // Perform MSSA cloning updates.
2490 for (auto &VMap : VMaps)
2491 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2492 /*IgnoreIncomingWithNoClones=*/true);
2493 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2494 }
2495 }
2496
2497 // Apply the updates accumulated above to get an up-to-date dominator tree.
2498 DT.applyUpdates(DTUpdates);
2499
2500 // Now that we have an accurate dominator tree, first delete the dead cloned
2501 // blocks so that we can accurately build any cloned loops. It is important to
2502 // not delete the blocks from the original loop yet because we still want to
2503 // reference the original loop to understand the cloned loop's structure.
2504 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2505
2506 // Build the cloned loop structure itself. This may be substantially
2507 // different from the original structure due to the simplified CFG. This also
2508 // handles inserting all the cloned blocks into the correct loops.
2509 SmallVector<Loop *, 4> NonChildClonedLoops;
2510 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2511 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2512
2513 // Now that our cloned loops have been built, we can update the original loop.
2514 // First we delete the dead blocks from it and then we rebuild the loop
2515 // structure taking these deletions into account.
2516 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2517
2518 if (MSSAU && VerifyMemorySSA)
2519 MSSAU->getMemorySSA()->verifyMemorySSA();
2520
2521 SmallVector<Loop *, 4> HoistedLoops;
2522 bool IsStillLoop =
2523 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2524
2525 if (MSSAU && VerifyMemorySSA)
2526 MSSAU->getMemorySSA()->verifyMemorySSA();
2527
2528 // This transformation has a high risk of corrupting the dominator tree, and
2529 // the below steps to rebuild loop structures will result in hard to debug
2530 // errors in that case so verify that the dominator tree is sane first.
2531 // FIXME: Remove this when the bugs stop showing up and rely on existing
2532 // verification steps.
2533 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2534
2535 if (BI && !PartiallyInvariant) {
2536 // If we unswitched a branch which collapses the condition to a known
2537 // constant we want to replace all the uses of the invariants within both
2538 // the original and cloned blocks. We do this here so that we can use the
2539 // now updated dominator tree to identify which side the users are on.
2540 assert(UnswitchedSuccBBs.size() == 1 &&
2541 "Only one possible unswitched block for a branch!");
2542 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2543
2544 // When considering multiple partially-unswitched invariants
2545 // we cant just go replace them with constants in both branches.
2546 //
2547 // For 'AND' we infer that true branch ("continue") means true
2548 // for each invariant operand.
2549 // For 'OR' we can infer that false branch ("continue") means false
2550 // for each invariant operand.
2551 // So it happens that for multiple-partial case we dont replace
2552 // in the unswitched branch.
2553 bool ReplaceUnswitched =
2554 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2555
2556 ConstantInt *UnswitchedReplacement =
2559 ConstantInt *ContinueReplacement =
2562 for (Value *Invariant : Invariants) {
2563 assert(!isa<Constant>(Invariant) &&
2564 "Should not be replacing constant values!");
2565 // Use make_early_inc_range here as set invalidates the iterator.
2566 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2567 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2568 if (!UserI)
2569 continue;
2570
2571 // Replace it with the 'continue' side if in the main loop body, and the
2572 // unswitched if in the cloned blocks.
2573 if (DT.dominates(LoopPH, UserI->getParent()))
2574 U.set(ContinueReplacement);
2575 else if (ReplaceUnswitched &&
2576 DT.dominates(ClonedPH, UserI->getParent()))
2577 U.set(UnswitchedReplacement);
2578 }
2579 }
2580 }
2581
2582 // We can change which blocks are exit blocks of all the cloned sibling
2583 // loops, the current loop, and any parent loops which shared exit blocks
2584 // with the current loop. As a consequence, we need to re-form LCSSA for
2585 // them. But we shouldn't need to re-form LCSSA for any child loops.
2586 // FIXME: This could be made more efficient by tracking which exit blocks are
2587 // new, and focusing on them, but that isn't likely to be necessary.
2588 //
2589 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2590 // loop nest and update every loop that could have had its exits changed. We
2591 // also need to cover any intervening loops. We add all of these loops to
2592 // a list and sort them by loop depth to achieve this without updating
2593 // unnecessary loops.
2594 auto UpdateLoop = [&](Loop &UpdateL) {
2595#ifndef NDEBUG
2596 UpdateL.verifyLoop();
2597 for (Loop *ChildL : UpdateL) {
2598 ChildL->verifyLoop();
2599 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2600 "Perturbed a child loop's LCSSA form!");
2601 }
2602#endif
2603 // First build LCSSA for this loop so that we can preserve it when
2604 // forming dedicated exits. We don't want to perturb some other loop's
2605 // LCSSA while doing that CFG edit.
2606 formLCSSA(UpdateL, DT, &LI, SE);
2607
2608 // For loops reached by this loop's original exit blocks we may
2609 // introduced new, non-dedicated exits. At least try to re-form dedicated
2610 // exits for these loops. This may fail if they couldn't have dedicated
2611 // exits to start with.
2612 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2613 };
2614
2615 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2616 // and we can do it in any order as they don't nest relative to each other.
2617 //
2618 // Also check if any of the loops we have updated have become top-level loops
2619 // as that will necessitate widening the outer loop scope.
2620 for (Loop *UpdatedL :
2621 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2622 UpdateLoop(*UpdatedL);
2623 if (UpdatedL->isOutermost())
2624 OuterExitL = nullptr;
2625 }
2626 if (IsStillLoop) {
2627 UpdateLoop(L);
2628 if (L.isOutermost())
2629 OuterExitL = nullptr;
2630 }
2631
2632 // If the original loop had exit blocks, walk up through the outer most loop
2633 // of those exit blocks to update LCSSA and form updated dedicated exits.
2634 if (OuterExitL != &L)
2635 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2636 OuterL = OuterL->getParentLoop())
2637 UpdateLoop(*OuterL);
2638
2639#ifndef NDEBUG
2640 // Verify the entire loop structure to catch any incorrect updates before we
2641 // progress in the pass pipeline.
2642 LI.verify(DT);
2643#endif
2644
2645 // Now that we've unswitched something, make callbacks to report the changes.
2646 // For that we need to merge together the updated loops and the cloned loops
2647 // and check whether the original loop survived.
2648 SmallVector<Loop *, 4> SibLoops;
2649 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2650 if (UpdatedL->getParentLoop() == ParentL)
2651 SibLoops.push_back(UpdatedL);
2652 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2653 InjectedCondition, SibLoops);
2654
2655 if (MSSAU && VerifyMemorySSA)
2656 MSSAU->getMemorySSA()->verifyMemorySSA();
2657
2658 if (BI)
2659 ++NumBranches;
2660 else
2661 ++NumSwitches;
2662}
2663
2664/// Recursively compute the cost of a dominator subtree based on the per-block
2665/// cost map provided.
2666///
2667/// The recursive computation is memozied into the provided DT-indexed cost map
2668/// to allow querying it for most nodes in the domtree without it becoming
2669/// quadratic.
2671 DomTreeNode &N,
2674 // Don't accumulate cost (or recurse through) blocks not in our block cost
2675 // map and thus not part of the duplication cost being considered.
2676 auto BBCostIt = BBCostMap.find(N.getBlock());
2677 if (BBCostIt == BBCostMap.end())
2678 return 0;
2679
2680 // Lookup this node to see if we already computed its cost.
2681 auto DTCostIt = DTCostMap.find(&N);
2682 if (DTCostIt != DTCostMap.end())
2683 return DTCostIt->second;
2684
2685 // If not, we have to compute it. We can't use insert above and update
2686 // because computing the cost may insert more things into the map.
2687 InstructionCost Cost = std::accumulate(
2688 N.begin(), N.end(), BBCostIt->second,
2689 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2690 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2691 });
2692 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2693 (void)Inserted;
2694 assert(Inserted && "Should not insert a node while visiting children!");
2695 return Cost;
2696}
2697
2698/// Turns a select instruction into implicit control flow branch,
2699/// making the following replacement:
2700///
2701/// head:
2702/// --code before select--
2703/// select %cond, %trueval, %falseval
2704/// --code after select--
2705///
2706/// into
2707///
2708/// head:
2709/// --code before select--
2710/// br i1 %cond, label %then, label %tail
2711///
2712/// then:
2713/// br %tail
2714///
2715/// tail:
2716/// phi [ %trueval, %then ], [ %falseval, %head]
2717/// unreachable
2718///
2719/// It also makes all relevant DT and LI updates, so that all structures are in
2720/// valid state after this transform.
2722 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2723 AssumptionCache *AC) {
2724 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2725 BasicBlock *HeadBB = SI->getParent();
2726
2727 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2728 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2729 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2730 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2731 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2732 *TailBB = CondBr->getSuccessor(1);
2733 if (MSSAU)
2734 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2735
2736 PHINode *Phi =
2737 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2738 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2739 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2740 Phi->setDebugLoc(SI->getDebugLoc());
2741 SI->replaceAllUsesWith(Phi);
2742 SI->eraseFromParent();
2743
2744 if (MSSAU && VerifyMemorySSA)
2745 MSSAU->getMemorySSA()->verifyMemorySSA();
2746
2747 ++NumSelects;
2748 return CondBr;
2749}
2750
2751/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2752/// making the following replacement:
2753///
2754/// --code before guard--
2755/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2756/// --code after guard--
2757///
2758/// into
2759///
2760/// --code before guard--
2761/// br i1 %cond, label %guarded, label %deopt
2762///
2763/// guarded:
2764/// --code after guard--
2765///
2766/// deopt:
2767/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2768/// unreachable
2769///
2770/// It also makes all relevant DT and LI updates, so that all structures are in
2771/// valid state after this transform.
2773 DominatorTree &DT, LoopInfo &LI,
2774 MemorySSAUpdater *MSSAU) {
2775 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2776 BasicBlock *CheckBB = GI->getParent();
2777
2778 if (MSSAU && VerifyMemorySSA)
2779 MSSAU->getMemorySSA()->verifyMemorySSA();
2780
2781 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2782 Instruction *DeoptBlockTerm =
2784 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2785 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2786 // SplitBlockAndInsertIfThen inserts control flow that branches to
2787 // DeoptBlockTerm if the condition is true. We want the opposite.
2788 CheckBI->swapSuccessors();
2789
2790 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2791 GuardedBlock->setName("guarded");
2792 CheckBI->getSuccessor(1)->setName("deopt");
2793 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2794
2795 if (MSSAU)
2796 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2797
2798 GI->moveBefore(DeoptBlockTerm->getIterator());
2800
2801 if (MSSAU) {
2803 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2804 if (VerifyMemorySSA)
2805 MSSAU->getMemorySSA()->verifyMemorySSA();
2806 }
2807
2808 if (VerifyLoopInfo)
2809 LI.verify(DT);
2810 ++NumGuards;
2811 return CheckBI;
2812}
2813
2814/// Cost multiplier is a way to limit potentially exponential behavior
2815/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2816/// candidates available. Also consider the number of "sibling" loops with
2817/// the idea of accounting for previous unswitches that already happened on this
2818/// cluster of loops. There was an attempt to keep this formula simple,
2819/// just enough to limit the worst case behavior. Even if it is not that simple
2820/// now it is still not an attempt to provide a detailed heuristic size
2821/// prediction.
2822///
2823/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2824/// unswitch candidates, making adequate predictions instead of wild guesses.
2825/// That requires knowing not just the number of "remaining" candidates but
2826/// also costs of unswitching for each of these candidates.
2828 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2829 const DominatorTree &DT,
2830 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2831
2832 // Guards and other exiting conditions do not contribute to exponential
2833 // explosion as soon as they dominate the latch (otherwise there might be
2834 // another path to the latch remaining that does not allow to eliminate the
2835 // loop copy on unswitch).
2836 const BasicBlock *Latch = L.getLoopLatch();
2837 const BasicBlock *CondBlock = TI.getParent();
2838 if (DT.dominates(CondBlock, Latch) &&
2839 (isGuard(&TI) ||
2840 (TI.isTerminator() &&
2841 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2842 return L.contains(SuccBB);
2843 }) <= 1))) {
2844 NumCostMultiplierSkipped++;
2845 return 1;
2846 }
2847
2848 // Each invariant non-trivial condition, after being unswitched, is supposed
2849 // to have its own specialized sibling loop (the invariant condition has been
2850 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2851 // conditions in nested loops, the basic block size of the outer loop should
2852 // not be altered. If such a size significantly increases across unswitching
2853 // invocations, something may be wrong; so adjust the final cost taking this
2854 // into account.
2855 auto *ParentL = L.getParentLoop();
2856 int ParentLoopSizeMultiplier = 1;
2857 if (ParentL)
2858 ParentLoopSizeMultiplier =
2859 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2860
2861 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2862 : std::distance(LI.begin(), LI.end()));
2863 // Count amount of clones that all the candidates might cause during
2864 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2865 // cases.
2866 int UnswitchedClones = 0;
2867 for (const auto &Candidate : UnswitchCandidates) {
2868 const Instruction *CI = Candidate.TI;
2869 const BasicBlock *CondBlock = CI->getParent();
2870 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2871 if (isa<SelectInst>(CI)) {
2872 UnswitchedClones++;
2873 continue;
2874 }
2875 if (isGuard(CI)) {
2876 if (!SkipExitingSuccessors)
2877 UnswitchedClones++;
2878 continue;
2879 }
2880 int NonExitingSuccessors =
2881 llvm::count_if(successors(CondBlock),
2882 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2883 return !SkipExitingSuccessors || L.contains(SuccBB);
2884 });
2885 UnswitchedClones += Log2_32(NonExitingSuccessors);
2886 }
2887
2888 // Ignore up to the "unscaled candidates" number of unswitch candidates
2889 // when calculating the power-of-two scaling of the cost. The main idea
2890 // with this control is to allow a small number of unswitches to happen
2891 // and rely more on siblings multiplier (see below) when the number
2892 // of candidates is small.
2893 unsigned ClonesPower =
2894 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2895
2896 // Allowing top-level loops to spread a bit more than nested ones.
2897 int SiblingsMultiplier =
2898 std::max((ParentL ? SiblingsCount
2899 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2900 1);
2901 // Compute the cost multiplier in a way that won't overflow by saturating
2902 // at an upper bound.
2903 int CostMultiplier;
2904 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2905 SiblingsMultiplier > UnswitchThreshold ||
2906 ParentLoopSizeMultiplier > UnswitchThreshold)
2907 CostMultiplier = UnswitchThreshold;
2908 else
2909 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2910 (int)UnswitchThreshold);
2911
2912 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2913 << " (siblings " << SiblingsMultiplier << " * parent size "
2914 << ParentLoopSizeMultiplier << " * clones "
2915 << (1 << ClonesPower) << ")"
2916 << " for unswitch candidate: " << TI << "\n");
2917 return CostMultiplier;
2918}
2919
2922 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2923 const Loop &L, const LoopInfo &LI, AAResults &AA,
2924 const MemorySSAUpdater *MSSAU) {
2925 assert(UnswitchCandidates.empty() && "Should be!");
2926
2927 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2929 if (isa<Constant>(Cond))
2930 return;
2931 if (L.isLoopInvariant(Cond)) {
2932 UnswitchCandidates.push_back({I, {Cond}});
2933 return;
2934 }
2936 TinyPtrVector<Value *> Invariants =
2938 L, *static_cast<Instruction *>(Cond), LI);
2939 if (!Invariants.empty())
2940 UnswitchCandidates.push_back({I, std::move(Invariants)});
2941 }
2942 };
2943
2944 // Whether or not we should also collect guards in the loop.
2945 bool CollectGuards = false;
2946 if (UnswitchGuards) {
2947 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
2948 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2949 if (GuardDecl && !GuardDecl->use_empty())
2950 CollectGuards = true;
2951 }
2952
2953 for (auto *BB : L.blocks()) {
2954 if (LI.getLoopFor(BB) != &L)
2955 continue;
2956
2957 for (auto &I : *BB) {
2958 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2959 auto *Cond = SI->getCondition();
2960 // Do not unswitch vector selects and logical and/or selects
2961 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2962 AddUnswitchCandidatesForInst(SI, Cond);
2963 } else if (CollectGuards && isGuard(&I)) {
2964 auto *Cond =
2965 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2966 // TODO: Support AND, OR conditions and partial unswitching.
2967 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2968 UnswitchCandidates.push_back({&I, {Cond}});
2969 }
2970 }
2971
2972 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2973 // We can only consider fully loop-invariant switch conditions as we need
2974 // to completely eliminate the switch after unswitching.
2975 if (!isa<Constant>(SI->getCondition()) &&
2976 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2977 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2978 continue;
2979 }
2980
2981 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2982 if (!BI || !BI->isConditional() ||
2983 BI->getSuccessor(0) == BI->getSuccessor(1))
2984 continue;
2985
2986 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2987 }
2988
2989 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2990 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2991 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2992 })) {
2993 MemorySSA *MSSA = MSSAU->getMemorySSA();
2994 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2995 LLVM_DEBUG(
2996 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2997 << *Info->InstToDuplicate[0] << "\n");
2998 PartialIVInfo = *Info;
2999 PartialIVCondBranch = L.getHeader()->getTerminator();
3000 TinyPtrVector<Value *> ValsToDuplicate;
3001 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3002 UnswitchCandidates.push_back(
3003 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3004 }
3005 }
3006 return !UnswitchCandidates.empty();
3007}
3008
3009/// Tries to canonicalize condition described by:
3010///
3011/// br (LHS pred RHS), label IfTrue, label IfFalse
3012///
3013/// into its equivalent where `Pred` is something that we support for injected
3014/// invariants (so far it is limited to ult), LHS in canonicalized form is
3015/// non-invariant and RHS is an invariant.
3017 Value *&LHS, Value *&RHS,
3018 BasicBlock *&IfTrue,
3019 BasicBlock *&IfFalse,
3020 const Loop &L) {
3021 if (!L.contains(IfTrue)) {
3022 Pred = ICmpInst::getInversePredicate(Pred);
3023 std::swap(IfTrue, IfFalse);
3024 }
3025
3026 // Move loop-invariant argument to RHS position.
3027 if (L.isLoopInvariant(LHS)) {
3028 Pred = ICmpInst::getSwappedPredicate(Pred);
3029 std::swap(LHS, RHS);
3030 }
3031
3032 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3033 // Turn "x >=s 0" into "x <u UMIN_INT"
3034 Pred = ICmpInst::ICMP_ULT;
3035 RHS = ConstantInt::get(
3036 RHS->getContext(),
3037 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3038 }
3039}
3040
3041/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3042/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3043/// injecting a loop-invariant condition.
3045 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3046 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3047 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3048 return false;
3049 // TODO: Support other predicates.
3050 if (Pred != ICmpInst::ICMP_ULT)
3051 return false;
3052 // TODO: Support non-loop-exiting branches?
3053 if (!L.contains(IfTrue) || L.contains(IfFalse))
3054 return false;
3055 // FIXME: For some reason this causes problems with MSSA updates, need to
3056 // investigate why. So far, just don't unswitch latch.
3057 if (L.getHeader() == IfTrue)
3058 return false;
3059 return true;
3060}
3061
3062/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3063/// TakenSucc via injection of invariant conditions. The branch should be not
3064/// enough and not previously unswitched, the information about this comes from
3065/// the metadata.
3067 const BasicBlock *TakenSucc) {
3068 SmallVector<uint32_t> Weights;
3069 if (!extractBranchWeights(*BI, Weights))
3070 return false;
3072 BranchProbability LikelyTaken(T - 1, T);
3073
3074 assert(Weights.size() == 2 && "Unexpected profile data!");
3075 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3076 auto Num = Weights[Idx];
3077 auto Denom = Weights[0] + Weights[1];
3078 // Degenerate or overflowed metadata.
3079 if (Denom == 0 || Num > Denom)
3080 return false;
3081 BranchProbability ActualTaken(Num, Denom);
3082 if (LikelyTaken > ActualTaken)
3083 return false;
3084 return true;
3085}
3086
3087/// Materialize pending invariant condition of the given candidate into IR. The
3088/// injected loop-invariant condition implies the original loop-variant branch
3089/// condition, so the materialization turns
3090///
3091/// loop_block:
3092/// ...
3093/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3094///
3095/// into
3096///
3097/// preheader:
3098/// %invariant_cond = LHS pred RHS
3099/// ...
3100/// loop_block:
3101/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3102/// OriginalCheck:
3103/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3104/// ...
3105static NonTrivialUnswitchCandidate
3106injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3107 DominatorTree &DT, LoopInfo &LI,
3108 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3109 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3110 BasicBlock *Preheader = L.getLoopPreheader();
3111 assert(Preheader && "Loop is not in simplified form?");
3112 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3113 "Unswitching branch of inner loop!");
3114
3115 auto Pred = Candidate.PendingInjection->Pred;
3116 auto *LHS = Candidate.PendingInjection->LHS;
3117 auto *RHS = Candidate.PendingInjection->RHS;
3118 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3119 auto *TI = cast<BranchInst>(Candidate.TI);
3120 auto *BB = Candidate.TI->getParent();
3121 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3122 : TI->getSuccessor(0);
3123 // FIXME: Remove this once limitation on successors is lifted.
3124 assert(L.contains(InLoopSucc) && "Not supported yet!");
3125 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3126 auto &Ctx = BB->getContext();
3127
3128 IRBuilder<> Builder(Preheader->getTerminator());
3129 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3130 if (LHS->getType() != RHS->getType()) {
3131 if (LHS->getType()->getIntegerBitWidth() <
3132 RHS->getType()->getIntegerBitWidth())
3133 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3134 else
3135 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3136 }
3137 // Do not use builder here: CreateICmp may simplify this into a constant and
3138 // unswitching will break. Better optimize it away later.
3139 auto *InjectedCond =
3140 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3141 Preheader->getTerminator()->getIterator());
3142
3143 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3144 BB->getParent(), InLoopSucc);
3145 Builder.SetInsertPoint(TI);
3146 auto *InvariantBr =
3147 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3148
3149 Builder.SetInsertPoint(CheckBlock);
3150 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3151 TI->getSuccessor(1));
3152 TI->eraseFromParent();
3153
3154 // Fixup phis.
3155 for (auto &I : *InLoopSucc) {
3156 auto *PN = dyn_cast<PHINode>(&I);
3157 if (!PN)
3158 break;
3159 auto *Inc = PN->getIncomingValueForBlock(BB);
3160 PN->addIncoming(Inc, CheckBlock);
3161 }
3162 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3163
3165 { DominatorTree::Insert, BB, CheckBlock },
3166 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3167 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3168 { DominatorTree::Delete, BB, OutOfLoopSucc }
3169 };
3170
3171 DT.applyUpdates(DTUpdates);
3172 if (MSSAU)
3173 MSSAU->applyUpdates(DTUpdates, DT);
3174 L.addBasicBlockToLoop(CheckBlock, LI);
3175
3176#ifndef NDEBUG
3177 DT.verify();
3178 LI.verify(DT);
3179 if (MSSAU && VerifyMemorySSA)
3180 MSSAU->getMemorySSA()->verifyMemorySSA();
3181#endif
3182
3183 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3184 // higher because we have just inserted a new block. Need to think how to
3185 // adjust the cost of injected candidates when it was first computed.
3186 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3187 << " and considering it for unswitching.");
3188 ++NumInvariantConditionsInjected;
3189 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3190 Candidate.Cost);
3191}
3192
3193/// Given chain of loop branch conditions looking like:
3194/// br (Variant < Invariant1)
3195/// br (Variant < Invariant2)
3196/// br (Variant < Invariant3)
3197/// ...
3198/// collect set of invariant conditions on which we want to unswitch, which
3199/// look like:
3200/// Invariant1 <= Invariant2
3201/// Invariant2 <= Invariant3
3202/// ...
3203/// Though they might not immediately exist in the IR, we can still inject them.
3205 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3207 const DominatorTree &DT) {
3208
3211 if (Compares.size() < 2)
3212 return false;
3214 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3215 Next != Compares.end(); ++Prev, ++Next) {
3216 Value *LHS = Next->Invariant;
3217 Value *RHS = Prev->Invariant;
3218 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3219 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3220 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3221 std::nullopt, std::move(ToInject));
3222 UnswitchCandidates.push_back(std::move(Candidate));
3223 }
3224 return true;
3225}
3226
3227/// Collect unswitch candidates by invariant conditions that are not immediately
3228/// present in the loop. However, they can be injected into the code if we
3229/// decide it's profitable.
3230/// An example of such conditions is following:
3231///
3232/// for (...) {
3233/// x = load ...
3234/// if (! x <u C1) break;
3235/// if (! x <u C2) break;
3236/// <do something>
3237/// }
3238///
3239/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3240/// C2" automatically implies "x <u C2", so we can get rid of one of
3241/// loop-variant checks in unswitched loop version.
3244 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3245 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3246 const MemorySSAUpdater *MSSAU) {
3248 return false;
3249
3250 if (!DT.isReachableFromEntry(L.getHeader()))
3251 return false;
3252 auto *Latch = L.getLoopLatch();
3253 // Need to have a single latch and a preheader.
3254 if (!Latch)
3255 return false;
3256 assert(L.getLoopPreheader() && "Must have a preheader!");
3257
3259 // Traverse the conditions that dominate latch (and therefore dominate each
3260 // other).
3261 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3262 DTN = DTN->getIDom()) {
3263 CmpPredicate Pred;
3264 Value *LHS = nullptr, *RHS = nullptr;
3265 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3266 auto *BB = DTN->getBlock();
3267 // Ignore inner loops.
3268 if (LI.getLoopFor(BB) != &L)
3269 continue;
3270 auto *Term = BB->getTerminator();
3271 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3272 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3273 continue;
3274 if (!LHS->getType()->isIntegerTy())
3275 continue;
3276 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3277 L);
3278 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3279 continue;
3281 continue;
3282 // Strip ZEXT for unsigned predicate.
3283 // TODO: once signed predicates are supported, also strip SEXT.
3284 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3285 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3286 LHS = Zext->getOperand(0);
3287 CandidatesULT[LHS].push_back(Desc);
3288 }
3289
3290 bool Found = false;
3291 for (auto &It : CandidatesULT)
3293 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3294 return Found;
3295}
3296
3298 if (!L.isSafeToClone())
3299 return false;
3300 for (auto *BB : L.blocks())
3301 for (auto &I : *BB) {
3302 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3303 return false;
3304 if (auto *CB = dyn_cast<CallBase>(&I)) {
3305 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3306 if (CB->isConvergent())
3307 return false;
3308 }
3309 }
3310
3311 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3312 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3313 // irreducible control flow into reducible control flow and introduce new
3314 // loops "out of thin air". If we ever discover important use cases for doing
3315 // this, we can add support to loop unswitch, but it is a lot of complexity
3316 // for what seems little or no real world benefit.
3317 LoopBlocksRPO RPOT(&L);
3318 RPOT.perform(&LI);
3320 return false;
3321
3323 L.getUniqueExitBlocks(ExitBlocks);
3324 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3325 // instruction as we don't know how to split those exit blocks.
3326 // FIXME: We should teach SplitBlock to handle this and remove this
3327 // restriction.
3328 for (auto *ExitBB : ExitBlocks) {
3329 auto It = ExitBB->getFirstNonPHIIt();
3331 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3332 "in exit block\n");
3333 return false;
3334 }
3335 }
3336
3337 return true;
3338}
3339
3340static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3341 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3342 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3343 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3344 // Given that unswitching these terminators will require duplicating parts of
3345 // the loop, so we need to be able to model that cost. Compute the ephemeral
3346 // values and set up a data structure to hold per-BB costs. We cache each
3347 // block's cost so that we don't recompute this when considering different
3348 // subsets of the loop for duplication during unswitching.
3350 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3352
3353 // Compute the cost of each block, as well as the total loop cost. Also, bail
3354 // out if we see instructions which are incompatible with loop unswitching
3355 // (convergent, noduplicate, or cross-basic-block tokens).
3356 // FIXME: We might be able to safely handle some of these in non-duplicated
3357 // regions.
3359 L.getHeader()->getParent()->hasMinSize()
3362 InstructionCost LoopCost = 0;
3363 for (auto *BB : L.blocks()) {
3364 InstructionCost Cost = 0;
3365 for (auto &I : *BB) {
3366 if (EphValues.count(&I))
3367 continue;
3368 Cost += TTI.getInstructionCost(&I, CostKind);
3369 }
3370 assert(Cost >= 0 && "Must not have negative costs!");
3371 LoopCost += Cost;
3372 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3373 BBCostMap[BB] = Cost;
3374 }
3375 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3376
3377 // Now we find the best candidate by searching for the one with the following
3378 // properties in order:
3379 //
3380 // 1) An unswitching cost below the threshold
3381 // 2) The smallest number of duplicated unswitch candidates (to avoid
3382 // creating redundant subsequent unswitching)
3383 // 3) The smallest cost after unswitching.
3384 //
3385 // We prioritize reducing fanout of unswitch candidates provided the cost
3386 // remains below the threshold because this has a multiplicative effect.
3387 //
3388 // This requires memoizing each dominator subtree to avoid redundant work.
3389 //
3390 // FIXME: Need to actually do the number of candidates part above.
3392 // Given a terminator which might be unswitched, computes the non-duplicated
3393 // cost for that terminator.
3394 auto ComputeUnswitchedCost = [&](Instruction &TI,
3395 bool FullUnswitch) -> InstructionCost {
3396 // Unswitching selects unswitches the entire loop.
3397 if (isa<SelectInst>(TI))
3398 return LoopCost;
3399
3400 BasicBlock &BB = *TI.getParent();
3402
3403 InstructionCost Cost = 0;
3404 for (BasicBlock *SuccBB : successors(&BB)) {
3405 // Don't count successors more than once.
3406 if (!Visited.insert(SuccBB).second)
3407 continue;
3408
3409 // If this is a partial unswitch candidate, then it must be a conditional
3410 // branch with a condition of either `or`, `and`, their corresponding
3411 // select forms or partially invariant instructions. In that case, one of
3412 // the successors is necessarily duplicated, so don't even try to remove
3413 // its cost.
3414 if (!FullUnswitch) {
3415 auto &BI = cast<BranchInst>(TI);
3416 Value *Cond = skipTrivialSelect(BI.getCondition());
3417 if (match(Cond, m_LogicalAnd())) {
3418 if (SuccBB == BI.getSuccessor(1))
3419 continue;
3420 } else if (match(Cond, m_LogicalOr())) {
3421 if (SuccBB == BI.getSuccessor(0))
3422 continue;
3423 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3424 SuccBB == BI.getSuccessor(0)) ||
3425 (!PartialIVInfo.KnownValue->isOneValue() &&
3426 SuccBB == BI.getSuccessor(1)))
3427 continue;
3428 }
3429
3430 // This successor's domtree will not need to be duplicated after
3431 // unswitching if the edge to the successor dominates it (and thus the
3432 // entire tree). This essentially means there is no other path into this
3433 // subtree and so it will end up live in only one clone of the loop.
3434 if (SuccBB->getUniquePredecessor() ||
3435 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3436 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3437 })) {
3438 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3439 assert(Cost <= LoopCost &&
3440 "Non-duplicated cost should never exceed total loop cost!");
3441 }
3442 }
3443
3444 // Now scale the cost by the number of unique successors minus one. We
3445 // subtract one because there is already at least one copy of the entire
3446 // loop. This is computing the new cost of unswitching a condition.
3447 // Note that guards always have 2 unique successors that are implicit and
3448 // will be materialized if we decide to unswitch it.
3449 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3450 assert(SuccessorsCount > 1 &&
3451 "Cannot unswitch a condition without multiple distinct successors!");
3452 return (LoopCost - Cost) * (SuccessorsCount - 1);
3453 };
3454
3455 std::optional<NonTrivialUnswitchCandidate> Best;
3456 for (auto &Candidate : UnswitchCandidates) {
3457 Instruction &TI = *Candidate.TI;
3458 ArrayRef<Value *> Invariants = Candidate.Invariants;
3460 bool FullUnswitch =
3461 !BI || Candidate.hasPendingInjection() ||
3462 (Invariants.size() == 1 &&
3463 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3464 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3465 // Calculate cost multiplier which is a tool to limit potentially
3466 // exponential behavior of loop-unswitch.
3468 int CostMultiplier =
3469 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3470 assert(
3471 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3472 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3473 CandidateCost *= CostMultiplier;
3474 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3475 << " (multiplier: " << CostMultiplier << ")"
3476 << " for unswitch candidate: " << TI << "\n");
3477 } else {
3478 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3479 << " for unswitch candidate: " << TI << "\n");
3480 }
3481
3482 if (!Best || CandidateCost < Best->Cost) {
3483 Best = Candidate;
3484 Best->Cost = CandidateCost;
3485 }
3486 }
3487 assert(Best && "Must be!");
3488 return *Best;
3489}
3490
3491// Insert a freeze on an unswitched branch if all is true:
3492// 1. freeze-loop-unswitch-cond option is true
3493// 2. The branch may not execute in the loop pre-transformation. If a branch may
3494// not execute and could cause UB, it would always cause UB if it is hoisted outside
3495// of the loop. Insert a freeze to prevent this case.
3496// 3. The branch condition may be poison or undef
3498 AssumptionCache &AC) {
3501 return false;
3502
3503 ICFLoopSafetyInfo SafetyInfo;
3504 SafetyInfo.computeLoopSafetyInfo(&L);
3505 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3506 return false;
3507
3508 Value *Cond;
3509 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3510 Cond = skipTrivialSelect(BI->getCondition());
3511 else
3512 Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3514 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3515}
3516
3520 MemorySSAUpdater *MSSAU,
3521 LPMUpdater &LoopUpdater) {
3522 // Collect all invariant conditions within this loop (as opposed to an inner
3523 // loop which would be handled when visiting that inner loop).
3525 IVConditionInfo PartialIVInfo;
3526 Instruction *PartialIVCondBranch = nullptr;
3527 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3528 PartialIVCondBranch, L, LI, AA, MSSAU);
3529 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3530 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3531 PartialIVCondBranch, L, DT, LI, AA,
3532 MSSAU);
3533 // If we didn't find any candidates, we're done.
3534 if (UnswitchCandidates.empty())
3535 return false;
3536
3537 LLVM_DEBUG(
3538 dbgs() << "Considering " << UnswitchCandidates.size()
3539 << " non-trivial loop invariant conditions for unswitching.\n");
3540
3541 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3542 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3543
3544 assert(Best.TI && "Failed to find loop unswitch candidate");
3545 assert(Best.Cost && "Failed to compute cost");
3546
3547 if (*Best.Cost >= UnswitchThreshold) {
3548 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3549 << "\n");
3550 return false;
3551 }
3552
3553 bool InjectedCondition = false;
3554 if (Best.hasPendingInjection()) {
3555 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3556 InjectedCondition = true;
3557 }
3558 assert(!Best.hasPendingInjection() &&
3559 "All injections should have been done by now!");
3560
3561 if (Best.TI != PartialIVCondBranch)
3562 PartialIVInfo.InstToDuplicate.clear();
3563
3564 bool InsertFreeze;
3565 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3566 // If the best candidate is a select, turn it into a branch. Select
3567 // instructions with a poison conditional do not propagate poison, but
3568 // branching on poison causes UB. Insert a freeze on the select
3569 // conditional to prevent UB after turning the select into a branch.
3570 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3571 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3572 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3573 } else {
3574 // If the best candidate is a guard, turn it into a branch.
3575 if (isGuard(Best.TI))
3576 Best.TI =
3577 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3578 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3579 }
3580
3581 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3582 << ") terminator: " << *Best.TI << "\n");
3583 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3584 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3585 InjectedCondition);
3586 return true;
3587}
3588
3589/// Unswitch control flow predicated on loop invariant conditions.
3590///
3591/// This first hoists all branches or switches which are trivial (IE, do not
3592/// require duplicating any part of the loop) out of the loop body. It then
3593/// looks at other loop invariant control flows and tries to unswitch those as
3594/// well by cloning the loop if the result is small enough.
3595///
3596/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3597/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3598/// valid (i.e. its use is enabled).
3599///
3600/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3601/// true, we will attempt to do non-trivial unswitching as well as trivial
3602/// unswitching.
3603///
3604/// The `postUnswitch` function will be run after unswitching is complete
3605/// with information on whether or not the provided loop remains a loop and
3606/// a list of new sibling loops created.
3607///
3608/// If `SE` is non-null, we will update that analysis based on the unswitching
3609/// done.
3610static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3612 TargetTransformInfo &TTI, bool Trivial,
3613 bool NonTrivial, ScalarEvolution *SE,
3615 BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
3616 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3617 "Loops must be in LCSSA form before unswitching.");
3618
3619 // Must be in loop simplified form: we need a preheader and dedicated exits.
3620 if (!L.isLoopSimplifyForm())
3621 return false;
3622
3623 // Try trivial unswitch first before loop over other basic blocks in the loop.
3624 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3625 // If we unswitched successfully we will want to clean up the loop before
3626 // processing it further so just mark it as unswitched and return.
3627 postUnswitch(L, LoopUpdater, L.getName(),
3628 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3629 /*InjectedCondition*/ false, {});
3630 return true;
3631 }
3632
3633 const Function *F = L.getHeader()->getParent();
3634
3635 // Check whether we should continue with non-trivial conditions.
3636 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3637 // unswitching for testing and debugging.
3638 // NonTrivial: Parameter that enables non-trivial unswitching for this
3639 // invocation of the transform. But this should be allowed only
3640 // for targets without branch divergence.
3641 //
3642 // FIXME: If divergence analysis becomes available to a loop
3643 // transform, we should allow unswitching for non-trivial uniform
3644 // branches even on targets that have divergence.
3645 // https://bugs.llvm.org/show_bug.cgi?id=48819
3646 bool ContinueWithNonTrivial =
3647 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3648 if (!ContinueWithNonTrivial)
3649 return false;
3650
3651 // Skip non-trivial unswitching for optsize functions.
3652 if (F->hasOptSize())
3653 return false;
3654
3655 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3656 // of the loops L is nested in, and of the loops nested in L are all cold.
3657 auto IsLoopNestCold = [&](const Loop *L) {
3658 // Check L and all of its parent loops.
3659 auto *Parent = L;
3660 while (Parent) {
3661 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3662 return false;
3663 Parent = Parent->getParentLoop();
3664 }
3665 // Next check all loops nested within L.
3667 llvm::append_range(Worklist, L->getSubLoops());
3668 while (!Worklist.empty()) {
3669 auto *CurLoop = Worklist.pop_back_val();
3670 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3671 return false;
3672 llvm::append_range(Worklist, CurLoop->getSubLoops());
3673 }
3674 return true;
3675 };
3676
3677 // Skip cold loops in cold loop nests, as unswitching them brings little
3678 // benefit but increases the code size
3679 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3680 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3681 return false;
3682 }
3683
3684 // Perform legality checks.
3686 return false;
3687
3688 // For non-trivial unswitching, because it often creates new loops, we rely on
3689 // the pass manager to iterate on the loops rather than trying to immediately
3690 // reach a fixed point. There is no substantial advantage to iterating
3691 // internally, and if any of the new loops are simplified enough to contain
3692 // trivial unswitching we want to prefer those.
3693
3694 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3695 // a partial unswitch when possible below the threshold.
3696 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3697 return true;
3698
3699 // No other opportunities to unswitch.
3700 return false;
3701}
3702
3705 LPMUpdater &U) {
3706 Function &F = *L.getHeader()->getParent();
3707 (void)F;
3708 ProfileSummaryInfo *PSI = nullptr;
3709 if (auto OuterProxy =
3711 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3712 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3713 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3714 << "\n");
3715
3716 std::optional<MemorySSAUpdater> MSSAU;
3717 if (AR.MSSA) {
3718 MSSAU = MemorySSAUpdater(AR.MSSA);
3719 if (VerifyMemorySSA)
3720 AR.MSSA->verifyMemorySSA();
3721 }
3722 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3723 &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
3724 return PreservedAnalyses::all();
3725
3726 if (AR.MSSA && VerifyMemorySSA)
3727 AR.MSSA->verifyMemorySSA();
3728
3729 // Historically this pass has had issues with the dominator tree so verify it
3730 // in asserts builds.
3731 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3732
3733 auto PA = getLoopPassPreservedAnalyses();
3734 if (AR.MSSA)
3735 PA.preserve<MemorySSAAnalysis>();
3736 return PA;
3737}
3738
3740 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3742 OS, MapClassName2PassName);
3743
3744 OS << '<';
3745 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3746 OS << (Trivial ? "" : "no-") << "trivial";
3747 OS << '>';
3748}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:480
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:386
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:873
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
bool isUnsigned() const
Definition InstrTypes.h:938
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:163
static DebugLoc getDropped()
Definition DebugLoc.h:164
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:187
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
iterator begin()
Definition DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:389
Metadata node.
Definition Metadata.h:1077
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:607
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:768
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:279
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:109
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:169
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:310
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2040
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1733
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1707
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1665
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
auto cast_or_null(const Y &Val)
Definition Casting.h:720
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1160
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:149
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:846
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1632
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
OuterAnalysisManagerProxy< FunctionAnalysisManager, Loop, LoopStandardAnalysisResults & > FunctionAnalysisManagerLoopProxy
A proxy from a FunctionAnalysisManager to a Loop.
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1943
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2102
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:605
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:608
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:611
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70