LLVM 22.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/Value.h"
35#include "llvm/Pass.h"
36#include "llvm/PassRegistry.h"
38#include "llvm/Support/Debug.h"
44
45#define DEBUG_TYPE "hardware-loops"
46
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
48
49using namespace llvm;
50
51static cl::opt<bool>
52ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
53 cl::desc("Force hardware loops intrinsics to be inserted"));
54
55static cl::opt<bool>
57 "force-hardware-loop-phi", cl::Hidden, cl::init(false),
58 cl::desc("Force hardware loop counter to be updated through a phi"));
59
60static cl::opt<bool>
61ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
62 cl::desc("Force allowance of nested hardware loops"));
63
65LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
66 cl::desc("Set the loop decrement value"));
67
69CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
70 cl::desc("Set the loop counter bitwidth"));
71
72static cl::opt<bool>
74 "force-hardware-loop-guard", cl::Hidden, cl::init(false),
75 cl::desc("Force generation of loop guard intrinsic"));
76
77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
78
79#ifndef NDEBUG
80static void debugHWLoopFailure(const StringRef DebugMsg,
81 Instruction *I) {
82 dbgs() << "HWLoops: " << DebugMsg;
83 if (I)
84 dbgs() << ' ' << *I;
85 else
86 dbgs() << '.';
87 dbgs() << '\n';
88}
89#endif
90
93 BasicBlock *CodeRegion = L->getHeader();
94 DebugLoc DL = L->getStartLoc();
95
96 if (I) {
97 CodeRegion = I->getParent();
98 // If there is no debug location attached to the instruction, revert back to
99 // using the loop's.
100 if (I->getDebugLoc())
101 DL = I->getDebugLoc();
102 }
103
104 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
105 R << "hardware-loop not created: ";
106 return R;
107}
108
109namespace {
110
111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
112 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
114 ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
115 }
116
117 using TTI = TargetTransformInfo;
118
119 class HardwareLoopsLegacy : public FunctionPass {
120 public:
121 static char ID;
122
123 HardwareLoopsLegacy() : FunctionPass(ID) {
125 }
126
127 bool runOnFunction(Function &F) override;
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
130 AU.addRequired<LoopInfoWrapperPass>();
131 AU.addPreserved<LoopInfoWrapperPass>();
132 AU.addRequired<DominatorTreeWrapperPass>();
133 AU.addPreserved<DominatorTreeWrapperPass>();
134 AU.addRequired<ScalarEvolutionWrapperPass>();
135 AU.addPreserved<ScalarEvolutionWrapperPass>();
136 AU.addRequired<AssumptionCacheTracker>();
137 AU.addRequired<TargetTransformInfoWrapperPass>();
138 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
139 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
140 }
141 };
142
143 class HardwareLoopsImpl {
144 public:
145 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
146 DominatorTree &DT, const DataLayout &DL,
147 const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
148 AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
149 HardwareLoopOptions &Opts)
150 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
151 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
152
153 bool run(Function &F);
154
155 private:
156 // Try to convert the given Loop into a hardware loop.
157 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
158
159 // Given that the target believes the loop to be profitable, try to
160 // convert it.
161 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
162
163 ScalarEvolution &SE;
164 LoopInfo &LI;
165 bool PreserveLCSSA;
166 DominatorTree &DT;
167 const DataLayout &DL;
168 const TargetTransformInfo &TTI;
169 TargetLibraryInfo *TLI = nullptr;
170 AssumptionCache &AC;
171 OptimizationRemarkEmitter *ORE;
172 HardwareLoopOptions &Opts;
173 bool MadeChange = false;
174 };
175
176 class HardwareLoop {
177 // Expand the trip count scev into a value that we can use.
178 Value *InitLoopCount();
179
180 // Insert the set_loop_iteration intrinsic.
181 Value *InsertIterationSetup(Value *LoopCountInit);
182
183 // Insert the loop_decrement intrinsic.
184 void InsertLoopDec();
185
186 // Insert the loop_decrement_reg intrinsic.
187 Instruction *InsertLoopRegDec(Value *EltsRem);
188
189 // If the target requires the counter value to be updated in the loop,
190 // insert a phi to hold the value. The intended purpose is for use by
191 // loop_decrement_reg.
192 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
193
194 // Create a new cmp, that checks the returned value of loop_decrement*,
195 // and update the exit branch to use it.
196 void UpdateBranch(Value *EltsRem);
197
198 public:
199 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
200 const DataLayout &DL,
201 OptimizationRemarkEmitter *ORE,
202 HardwareLoopOptions &Opts) :
203 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
204 ExitCount(Info.ExitCount),
205 CountType(Info.CountType),
206 ExitBranch(Info.ExitBranch),
207 LoopDecrement(Info.LoopDecrement),
208 UsePHICounter(Info.CounterInReg),
209 UseLoopGuard(Info.PerformEntryTest) { }
210
211 void Create();
212
213 private:
214 ScalarEvolution &SE;
215 const DataLayout &DL;
216 OptimizationRemarkEmitter *ORE = nullptr;
217 HardwareLoopOptions &Opts;
218 Loop *L = nullptr;
219 Module *M = nullptr;
220 const SCEV *ExitCount = nullptr;
221 Type *CountType = nullptr;
222 BranchInst *ExitBranch = nullptr;
223 Value *LoopDecrement = nullptr;
224 bool UsePHICounter = false;
225 bool UseLoopGuard = false;
226 BasicBlock *BeginBB = nullptr;
227 };
228}
229
230char HardwareLoopsLegacy::ID = 0;
231
232bool HardwareLoopsLegacy::runOnFunction(Function &F) {
233 if (skipFunction(F))
234 return false;
235
236 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
237
238 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
239 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
240 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
241 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
242 auto &DL = F.getDataLayout();
243 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
244 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
245 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
246 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
247 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
248
249 HardwareLoopOptions Opts;
258 if (LoopDecrement.getNumOccurrences())
260 if (CounterBitWidth.getNumOccurrences())
262
263 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
264 Opts);
265 return Impl.run(F);
266}
267
270 auto &LI = AM.getResult<LoopAnalysis>(F);
271 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
272 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
273 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
274 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
275 auto &AC = AM.getResult<AssumptionAnalysis>(F);
277 auto &DL = F.getDataLayout();
278
279 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
280 bool Changed = Impl.run(F);
281 if (!Changed)
282 return PreservedAnalyses::all();
283
289 return PA;
290}
291
292bool HardwareLoopsImpl::run(Function &F) {
293 LLVMContext &Ctx = F.getContext();
294 for (Loop *L : LI)
295 if (L->isOutermost())
296 TryConvertLoop(L, Ctx);
297 return MadeChange;
298}
299
300// Return true if the search should stop, which will be when an inner loop is
301// converted and the parent loop doesn't support containing a hardware loop.
302bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
303 // Process nested loops first.
304 bool AnyChanged = false;
305 for (Loop *SL : *L)
306 AnyChanged |= TryConvertLoop(SL, Ctx);
307 if (AnyChanged) {
308 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
309 ORE, L);
310 return true; // Stop search.
311 }
312
313 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
314
315 HardwareLoopInfo HWLoopInfo(L);
316 if (!HWLoopInfo.canAnalyze(LI)) {
317 reportHWLoopFailure("cannot analyze loop, irreducible control flow",
318 "HWLoopCannotAnalyze", ORE, L);
319 return false;
320 }
321
322 if (!Opts.Force &&
323 !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
324 reportHWLoopFailure("it's not profitable to create a hardware-loop",
325 "HWLoopNotProfitable", ORE, L);
326 return false;
327 }
328
329 // Allow overriding of the counter width and loop decrement value.
330 if (Opts.Bitwidth.has_value()) {
331 HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
332 }
333
334 if (Opts.Decrement.has_value())
335 HWLoopInfo.LoopDecrement =
336 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
337
338 MadeChange |= TryConvertLoop(HWLoopInfo);
339 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
340}
341
342bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
343
344 Loop *L = HWLoopInfo.L;
345 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
346
347 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
348 Opts.getForcePhi())) {
349 // TODO: there can be many reasons a loop is not considered a
350 // candidate, so we should let isHardwareLoopCandidate fill in the
351 // reason and then report a better message here.
352 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
353 return false;
354 }
355
356 assert(
357 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
358 "Hardware Loop must have set exit info.");
359
360 BasicBlock *Preheader = L->getLoopPreheader();
361
362 // If we don't have a preheader, then insert one.
363 if (!Preheader)
364 Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
365 if (!Preheader)
366 return false;
367
368 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
369 HWLoop.Create();
370 ++NumHWLoops;
371 return true;
372}
373
374void HardwareLoop::Create() {
375 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
376
377 Value *LoopCountInit = InitLoopCount();
378 if (!LoopCountInit) {
379 reportHWLoopFailure("could not safely create a loop count expression",
380 "HWLoopNotSafe", ORE, L);
381 return;
382 }
383
384 Value *Setup = InsertIterationSetup(LoopCountInit);
385
386 if (UsePHICounter || Opts.ForcePhi) {
387 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
388 Value *EltsRem = InsertPHICounter(Setup, LoopDec);
389 LoopDec->setOperand(0, EltsRem);
390 UpdateBranch(LoopDec);
391 } else
392 InsertLoopDec();
393
394 // Run through the basic blocks of the loop and see if any of them have dead
395 // PHIs that can be removed.
396 for (auto *I : L->blocks())
398}
399
400static bool CanGenerateTest(Loop *L, Value *Count) {
401 BasicBlock *Preheader = L->getLoopPreheader();
402 if (!Preheader->getSinglePredecessor())
403 return false;
404
405 BasicBlock *Pred = Preheader->getSinglePredecessor();
406 if (!isa<BranchInst>(Pred->getTerminator()))
407 return false;
408
409 auto *BI = cast<BranchInst>(Pred->getTerminator());
410 if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
411 return false;
412
413 // Check that the icmp is checking for equality of Count and zero and that
414 // a non-zero value results in entering the loop.
415 auto ICmp = cast<ICmpInst>(BI->getCondition());
416 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
417 if (!ICmp->isEquality())
418 return false;
419
420 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
421 if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
422 return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
423 return false;
424 };
425
426 // Check if Count is a zext.
427 Value *CountBefZext =
428 isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
429
430 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
431 !IsCompareZero(ICmp, CountBefZext, 0) &&
432 !IsCompareZero(ICmp, CountBefZext, 1))
433 return false;
434
435 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
436 if (BI->getSuccessor(SuccIdx) != Preheader)
437 return false;
438
439 return true;
440}
441
442Value *HardwareLoop::InitLoopCount() {
443 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
444 // Can we replace a conditional branch with an intrinsic that sets the
445 // loop counter and tests that is not zero?
446
447 SCEVExpander SCEVE(SE, DL, "loopcnt");
448 if (!ExitCount->getType()->isPointerTy() &&
449 ExitCount->getType() != CountType)
450 ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
451
452 ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
453
454 // If we're trying to use the 'test and set' form of the intrinsic, we need
455 // to replace a conditional branch that is controlling entry to the loop. It
456 // is likely (guaranteed?) that the preheader has an unconditional branch to
457 // the loop header, so also check if it has a single predecessor.
458 if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
459 SE.getZero(ExitCount->getType()))) {
460 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
461 if (Opts.ForceGuard)
462 UseLoopGuard = true;
463 } else
464 UseLoopGuard = false;
465
466 BasicBlock *BB = L->getLoopPreheader();
467 if (UseLoopGuard && BB->getSinglePredecessor() &&
468 cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
469 BasicBlock *Predecessor = BB->getSinglePredecessor();
470 // If it's not safe to create a while loop then don't force it and create a
471 // do-while loop instead
472 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
473 UseLoopGuard = false;
474 else
475 BB = Predecessor;
476 }
477
478 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
479 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
480 << *ExitCount << "\n");
481 return nullptr;
482 }
483
484 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
485 BB->getTerminator());
486
487 // FIXME: We've expanded Count where we hope to insert the counter setting
488 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
489 // the just 'set' form and in which case the insertion block is most likely
490 // different. It means there will be instruction(s) in a block that possibly
491 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
492 // but it's doesn't appear to work in all cases.
493
494 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
495 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
496 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
497 << " - Expanded Count in " << BB->getName() << "\n"
498 << " - Will insert set counter intrinsic into: "
499 << BeginBB->getName() << "\n");
500 return Count;
501}
502
503Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
504 IRBuilder<> Builder(BeginBB->getTerminator());
505 if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
506 Builder.setIsFPConstrained(true);
507 Type *Ty = LoopCountInit->getType();
508 bool UsePhi = UsePHICounter || Opts.ForcePhi;
509 Intrinsic::ID ID = UseLoopGuard
510 ? (UsePhi ? Intrinsic::test_start_loop_iterations
511 : Intrinsic::test_set_loop_iterations)
512 : (UsePhi ? Intrinsic::start_loop_iterations
513 : Intrinsic::set_loop_iterations);
514 Value *LoopSetup = Builder.CreateIntrinsic(ID, Ty, LoopCountInit);
515
516 // Use the return value of the intrinsic to control the entry of the loop.
517 if (UseLoopGuard) {
518 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
519 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
520 "Expected conditional branch");
521
522 Value *SetCount =
523 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
524 auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
525 LoopGuard->setCondition(SetCount);
526 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
527 LoopGuard->swapSuccessors();
528 }
529 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
530 << "\n");
531 if (UsePhi && UseLoopGuard)
532 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
533 return !UsePhi ? LoopCountInit : LoopSetup;
534}
535
536void HardwareLoop::InsertLoopDec() {
537 IRBuilder<> CondBuilder(ExitBranch);
538 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
539 Attribute::StrictFP))
540 CondBuilder.setIsFPConstrained(true);
541
542 Value *Ops[] = { LoopDecrement };
543 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,
544 LoopDecrement->getType(), Ops);
545 Value *OldCond = ExitBranch->getCondition();
546 ExitBranch->setCondition(NewCond);
547
548 // The false branch must exit the loop.
549 if (!L->contains(ExitBranch->getSuccessor(0)))
550 ExitBranch->swapSuccessors();
551
552 // The old condition may be dead now, and may have even created a dead PHI
553 // (the original induction variable).
555
556 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
557}
558
559Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
560 IRBuilder<> CondBuilder(ExitBranch);
561 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
562 Attribute::StrictFP))
563 CondBuilder.setIsFPConstrained(true);
564
565 Value *Ops[] = { EltsRem, LoopDecrement };
566 Value *Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,
567 {EltsRem->getType()}, Ops);
568
569 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
570 return cast<Instruction>(Call);
571}
572
573PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
574 BasicBlock *Preheader = L->getLoopPreheader();
575 BasicBlock *Header = L->getHeader();
576 BasicBlock *Latch = ExitBranch->getParent();
577 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
578 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
579 Index->addIncoming(NumElts, Preheader);
580 Index->addIncoming(EltsRem, Latch);
581 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
582 return Index;
583}
584
585void HardwareLoop::UpdateBranch(Value *EltsRem) {
586 IRBuilder<> CondBuilder(ExitBranch);
587 Value *NewCond =
588 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
589 Value *OldCond = ExitBranch->getCondition();
590 ExitBranch->setCondition(NewCond);
591
592 // The false branch must exit the loop.
593 if (!L->contains(ExitBranch->getSuccessor(0)))
594 ExitBranch->swapSuccessors();
595
596 // The old condition may be dead now, and may have even created a dead PHI
597 // (the original induction variable).
599}
600
601INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
602INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
603INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
604INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
605INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
606INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
607
608FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
#define HW_LOOPS_NAME
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
static bool CanGenerateTest(Loop *L, Value *Count)
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Defines an IR pass for the creation of hardware loops.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
@ ICMP_NE
not equal
Definition InstrTypes.h:700
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
A debug info location.
Definition DebugLoc.h:124
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This instruction compares its operands according to the predicate given to the constructor.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
int getNumOccurrences() const
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI char & LCSSAID
Definition LCSSA.cpp:526
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI void initializeHardwareLoopsLegacyPass(PassRegistry &)
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createHardwareLoopsLegacyPass()
Create Hardware Loop pass.
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
std::optional< bool > Force
HardwareLoopOptions & setForceNested(bool Force)
std::optional< bool > ForceGuard
std::optional< unsigned > Decrement
HardwareLoopOptions & setDecrement(unsigned Count)
HardwareLoopOptions & setForceGuard(bool Force)
HardwareLoopOptions & setForce(bool Force)
HardwareLoopOptions & setCounterBitwidth(unsigned Width)
std::optional< unsigned > Bitwidth
HardwareLoopOptions & setForcePhi(bool Force)
std::optional< bool > ForcePhi
std::optional< bool > ForceNested