LLVM 22.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
32#include "llvm/CodeGen/Passes.h"
38#include "llvm/MC/MCRegister.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66 cl::desc("Threshold for the size of CSUses"));
67
69 "aggressive-machine-cse", cl::Hidden, cl::init(false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74class MachineCSEImpl {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81public:
82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI)
83 : DT(DT), MBFI(MBFI) {}
84 bool run(MachineFunction &MF);
85
86private:
87 using AllocatorTy =
88 RecyclingAllocator<BumpPtrAllocator,
89 ScopedHashTableVal<MachineInstr *, unsigned>>;
90 using ScopedHTType =
91 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
92 AllocatorTy>;
93 using ScopeType = ScopedHTType::ScopeTy;
94 using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>;
95
96 unsigned LookAheadLimit = 0;
97 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
98 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
99 PREMap;
100 ScopedHTType VNT;
102 unsigned CurrVN = 0;
103
104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB);
105 bool isPhysDefTriviallyDead(MCRegister Reg,
108 bool hasLivePhysRegDefUses(const MachineInstr *MI,
109 const MachineBasicBlock *MBB,
110 SmallSet<MCRegister, 8> &PhysRefs,
111 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
113 const SmallSet<MCRegister, 8> &PhysRefs,
114 const PhysDefVector &PhysDefs, bool &NonLocal) const;
115 bool isCSECandidate(MachineInstr *MI);
116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB,
117 MachineInstr *MI);
118 void EnterScope(MachineBasicBlock *MBB);
119 void ExitScope(MachineBasicBlock *MBB);
120 bool ProcessBlockCSE(MachineBasicBlock *MBB);
121 void ExitScopeIfDone(MachineDomTreeNode *Node,
122 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren);
123 bool PerformCSE(MachineDomTreeNode *Node);
124
125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
127 bool PerformSimplePRE(MachineDominatorTree *DT);
128 /// Heuristics to see if it's profitable to move common computations of MBB
129 /// and MBB1 to CandidateBB.
130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
131 MachineBasicBlock *MBB, MachineBasicBlock *MBB1);
132 void releaseMemory();
133};
134
135class MachineCSELegacy : public MachineFunctionPass {
136public:
137 static char ID; // Pass identification
138
139 MachineCSELegacy() : MachineFunctionPass(ID) {
141 }
142
143 bool runOnMachineFunction(MachineFunction &MF) override;
144
145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.setPreservesCFG();
149 AU.addRequired<MachineDominatorTreeWrapperPass>();
150 AU.addPreserved<MachineDominatorTreeWrapperPass>();
151 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
152 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
153 }
154
155 MachineFunctionProperties getRequiredProperties() const override {
156 return MachineFunctionProperties().setIsSSA();
157 }
158};
159} // end anonymous namespace
160
161char MachineCSELegacy::ID = 0;
162
163char &llvm::MachineCSELegacyID = MachineCSELegacy::ID;
164
166 "Machine Common Subexpression Elimination", false, false)
169 "Machine Common Subexpression Elimination", false, false)
170
171/// The source register of a COPY machine instruction can be propagated to all
172/// its users, and this propagation could increase the probability of finding
173/// common subexpressions. If the COPY has only one user, the COPY itself can
174/// be removed.
175bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
177 bool Changed = false;
178 for (MachineOperand &MO : MI->all_uses()) {
179 Register Reg = MO.getReg();
180 if (!Reg.isVirtual())
181 continue;
182 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
183 MachineInstr *DefMI = MRI->getVRegDef(Reg);
184 if (!DefMI || !DefMI->isCopy())
185 continue;
186 Register SrcReg = DefMI->getOperand(1).getReg();
187 if (!SrcReg.isVirtual())
188 continue;
189 // FIXME: We should trivially coalesce subregister copies to expose CSE
190 // opportunities on instructions with truncated operands (see
191 // cse-add-with-overflow.ll). This can be done here as follows:
192 // if (SrcSubReg)
193 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
194 // SrcSubReg);
195 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
196 //
197 // The 2-addr pass has been updated to handle coalesced subregs. However,
198 // some machine-specific code still can't handle it.
199 // To handle it properly we also need a way find a constrained subregister
200 // class given a super-reg class and subreg index.
201 if (DefMI->getOperand(1).getSubReg())
202 continue;
203 if (!MRI->constrainRegAttrs(SrcReg, Reg))
204 continue;
205 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
206 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
207
208 // Propagate SrcReg of copies to MI.
209 MO.setReg(SrcReg);
210 MRI->clearKillFlags(SrcReg);
211 // Coalesce single use copies.
212 if (OnlyOneUse) {
213 // If (and only if) we've eliminated all uses of the copy, also
214 // copy-propagate to any debug-users of MI, or they'll be left using
215 // an undefined value.
216 DefMI->changeDebugValuesDefReg(SrcReg);
217
218 DefMI->eraseFromParent();
219 ++NumCoalesces;
220 }
221 Changed = true;
222 }
223
224 return Changed;
225}
226
227bool MachineCSEImpl::isPhysDefTriviallyDead(
230 unsigned LookAheadLeft = LookAheadLimit;
231 while (LookAheadLeft) {
232 // Skip over dbg_value's.
234
235 if (I == E)
236 // Reached end of block, we don't know if register is dead or not.
237 return false;
238
239 bool SeenDef = false;
240 for (const MachineOperand &MO : I->operands()) {
241 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
242 SeenDef = true;
243 if (!MO.isReg() || !MO.getReg())
244 continue;
245 if (!TRI->regsOverlap(MO.getReg(), Reg))
246 continue;
247 if (MO.isUse())
248 // Found a use!
249 return false;
250 SeenDef = true;
251 }
252 if (SeenDef)
253 // See a def of Reg (or an alias) before encountering any use, it's
254 // trivially dead.
255 return true;
256
257 --LookAheadLeft;
258 ++I;
259 }
260 return false;
261}
262
264 const MachineOperand &MO,
265 const MachineFunction &MF,
266 const TargetRegisterInfo &TRI,
267 const TargetInstrInfo &TII) {
268 // MachineRegisterInfo::isConstantPhysReg directly called by
269 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
270 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
271 // most (if not all) targets freeze reserved registers right after ISel.
272 //
273 // It does cause issues mid-GlobalISel, however, hence the additional
274 // reservedRegsFrozen check.
275 const MachineRegisterInfo &MRI = MF.getRegInfo();
276 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
277 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
278}
279
280/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
281/// physical registers (except for dead defs of physical registers). It also
282/// returns the physical register def by reference if it's the only one and the
283/// instruction does not uses a physical register.
284bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI,
285 const MachineBasicBlock *MBB,
286 SmallSet<MCRegister, 8> &PhysRefs,
287 PhysDefVector &PhysDefs,
288 bool &PhysUseDef) const {
289 // First, add all uses to PhysRefs.
290 for (const MachineOperand &MO : MI->all_uses()) {
291 Register Reg = MO.getReg();
292 if (!Reg)
293 continue;
294 if (Reg.isVirtual())
295 continue;
296 // Reading either caller preserved or constant physregs is ok.
297 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
298 *TII))
299 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
300 PhysRefs.insert(*AI);
301 }
302
303 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
304 // (which currently contains only uses), set the PhysUseDef flag.
305 PhysUseDef = false;
306 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
307 for (const auto &MOP : llvm::enumerate(MI->operands())) {
308 const MachineOperand &MO = MOP.value();
309 if (!MO.isReg() || !MO.isDef())
310 continue;
311 Register Reg = MO.getReg();
312 if (!Reg)
313 continue;
314 if (Reg.isVirtual())
315 continue;
316 // Check against PhysRefs even if the def is "dead".
317 if (PhysRefs.count(Reg.asMCReg()))
318 PhysUseDef = true;
319 // If the def is dead, it's ok. But the def may not marked "dead". That's
320 // common since this pass is run before livevariables. We can scan
321 // forward a few instructions and check if it is obviously dead.
322 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
323 PhysDefs.emplace_back(MOP.index(), Reg);
324 }
325
326 // Finally, add all defs to PhysRefs as well.
327 for (const auto &Def : PhysDefs)
328 for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI)
329 PhysRefs.insert(*AI);
330
331 return !PhysRefs.empty();
332}
333
334bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
335 const SmallSet<MCRegister, 8> &PhysRefs,
336 const PhysDefVector &PhysDefs,
337 bool &NonLocal) const {
338 // For now conservatively returns false if the common subexpression is
339 // not in the same basic block as the given instruction. The only exception
340 // is if the common subexpression is in the sole predecessor block.
341 const MachineBasicBlock *MBB = MI->getParent();
342 const MachineBasicBlock *CSMBB = CSMI->getParent();
343
344 bool CrossMBB = false;
345 if (CSMBB != MBB) {
346 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
347 return false;
348
349 for (const auto &PhysDef : PhysDefs) {
350 if (MRI->isAllocatable(PhysDef.second) || MRI->isReserved(PhysDef.second))
351 // Avoid extending live range of physical registers if they are
352 //allocatable or reserved.
353 return false;
354 }
355 CrossMBB = true;
356 }
357 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
360 unsigned LookAheadLeft = LookAheadLimit;
361 while (LookAheadLeft) {
362 // Skip over dbg_value's.
363 while (I != E && I != EE && I->isDebugInstr())
364 ++I;
365
366 if (I == EE) {
367 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
368 (void)CrossMBB;
369 CrossMBB = false;
370 NonLocal = true;
371 I = MBB->begin();
372 EE = MBB->end();
373 continue;
374 }
375
376 if (I == E)
377 return true;
378
379 for (const MachineOperand &MO : I->operands()) {
380 // RegMasks go on instructions like calls that clobber lots of physregs.
381 // Don't attempt to CSE across such an instruction.
382 if (MO.isRegMask())
383 return false;
384 if (!MO.isReg() || !MO.isDef())
385 continue;
386 Register MOReg = MO.getReg();
387 if (MOReg.isVirtual())
388 continue;
389 if (PhysRefs.count(MOReg.asMCReg()))
390 return false;
391 }
392
393 --LookAheadLeft;
394 ++I;
395 }
396
397 return false;
398}
399
400bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) {
401 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
402 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
403 MI->isFakeUse())
404 return false;
405
406 // Ignore copies.
407 if (MI->isCopyLike())
408 return false;
409
410 // Ignore stuff that we obviously can't move.
411 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
412 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
413 return false;
414
415 if (MI->mayLoad()) {
416 // Okay, this instruction does a load. As a refinement, we allow the target
417 // to decide whether the loaded value is actually a constant. If so, we can
418 // actually use it as a load.
419 if (!MI->isDereferenceableInvariantLoad())
420 // FIXME: we should be able to hoist loads with no other side effects if
421 // there are no other instructions which can change memory in this loop.
422 // This is a trivial form of alias analysis.
423 return false;
424 }
425
426 // Ignore stack guard loads, otherwise the register that holds CSEed value may
427 // be spilled and get loaded back with corrupted data.
428 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
429 return false;
430
431 return true;
432}
433
434/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
435/// common expression that defines Reg. CSBB is basic block where CSReg is
436/// defined.
437bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg,
438 MachineBasicBlock *CSBB,
439 MachineInstr *MI) {
441 return true;
442
443 // FIXME: Heuristics that works around the lack the live range splitting.
444
445 // If CSReg is used at all uses of Reg, CSE should not increase register
446 // pressure of CSReg.
447 bool MayIncreasePressure = true;
448 if (CSReg.isVirtual() && Reg.isVirtual()) {
449 MayIncreasePressure = false;
450 SmallPtrSet<MachineInstr*, 8> CSUses;
451 int NumOfUses = 0;
452 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
453 CSUses.insert(&MI);
454 // Too costly to compute if NumOfUses is very large. Conservatively assume
455 // MayIncreasePressure to avoid spending too much time here.
456 if (++NumOfUses > CSUsesThreshold) {
457 MayIncreasePressure = true;
458 break;
459 }
460 }
461 if (!MayIncreasePressure)
462 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
463 if (!CSUses.count(&MI)) {
464 MayIncreasePressure = true;
465 break;
466 }
467 }
468 }
469 if (!MayIncreasePressure) return true;
470
471 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
472 // an immediate predecessor. We don't want to increase register pressure and
473 // end up causing other computation to be spilled.
474 if (TII->isAsCheapAsAMove(*MI)) {
475 MachineBasicBlock *BB = MI->getParent();
476 if (CSBB != BB && !CSBB->isSuccessor(BB))
477 return false;
478 }
479
480 // Heuristics #2: If the expression doesn't not use a vr and the only use
481 // of the redundant computation are copies, do not cse.
482 bool HasVRegUse = false;
483 for (const MachineOperand &MO : MI->all_uses()) {
484 if (MO.getReg().isVirtual()) {
485 HasVRegUse = true;
486 break;
487 }
488 }
489 if (!HasVRegUse) {
490 bool HasNonCopyUse = false;
491 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
492 // Ignore copies.
493 if (!MI.isCopyLike()) {
494 HasNonCopyUse = true;
495 break;
496 }
497 }
498 if (!HasNonCopyUse)
499 return false;
500 }
501
502 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
503 // it unless the defined value is already used in the BB of the new use.
504 bool HasPHI = false;
505 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
506 HasPHI |= UseMI.isPHI();
507 if (UseMI.getParent() == MI->getParent())
508 return true;
509 }
510
511 return !HasPHI;
512}
513
514void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) {
515 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
516 ScopeType *Scope = new ScopeType(VNT);
517 ScopeMap[MBB] = Scope;
518}
519
520void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) {
521 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
522 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
523 assert(SI != ScopeMap.end());
524 delete SI->second;
525 ScopeMap.erase(SI);
526}
527
528bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) {
529 bool Changed = false;
530
532 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
533 SmallVector<Register, 2> ImplicitDefs;
534 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
535 if (!isCSECandidate(&MI))
536 continue;
537
538 bool FoundCSE = VNT.count(&MI);
539 if (!FoundCSE) {
540 // Using trivial copy propagation to find more CSE opportunities.
541 if (PerformTrivialCopyPropagation(&MI, MBB)) {
542 Changed = true;
543
544 // After coalescing MI itself may become a copy.
545 if (MI.isCopyLike())
546 continue;
547
548 // Try again to see if CSE is possible.
549 FoundCSE = VNT.count(&MI);
550 }
551 }
552
553 // Commute commutable instructions.
554 bool Commuted = false;
555 if (!FoundCSE && MI.isCommutable()) {
556 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
557 Commuted = true;
558 FoundCSE = VNT.count(NewMI);
559 if (NewMI != &MI) {
560 // New instruction. It doesn't need to be kept.
561 NewMI->eraseFromParent();
562 Changed = true;
563 } else if (!FoundCSE)
564 // MI was changed but it didn't help, commute it back!
565 (void)TII->commuteInstruction(MI);
566 }
567 }
568
569 // If the instruction defines physical registers and the values *may* be
570 // used, then it's not safe to replace it with a common subexpression.
571 // It's also not safe if the instruction uses physical registers.
572 bool CrossMBBPhysDef = false;
573 SmallSet<MCRegister, 8> PhysRefs;
574 PhysDefVector PhysDefs;
575 bool PhysUseDef = false;
576 if (FoundCSE &&
577 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
578 FoundCSE = false;
579
580 // ... Unless the CS is local or is in the sole predecessor block
581 // and it also defines the physical register which is not clobbered
582 // in between and the physical register uses were not clobbered.
583 // This can never be the case if the instruction both uses and
584 // defines the same physical register, which was detected above.
585 if (!PhysUseDef) {
586 unsigned CSVN = VNT.lookup(&MI);
587 MachineInstr *CSMI = Exps[CSVN];
588 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
589 FoundCSE = true;
590 }
591 }
592
593 if (!FoundCSE) {
594 VNT.insert(&MI, CurrVN++);
595 Exps.push_back(&MI);
596 continue;
597 }
598
599 // Found a common subexpression, eliminate it.
600 unsigned CSVN = VNT.lookup(&MI);
601 MachineInstr *CSMI = Exps[CSVN];
602 LLVM_DEBUG(dbgs() << "Examining: " << MI);
603 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
604
605 // Prevent CSE-ing non-local convergent instructions.
606 // LLVM's current definition of `isConvergent` does not necessarily prove
607 // that non-local CSE is illegal. The following check extends the definition
608 // of `isConvergent` to assume a convergent instruction is dependent not
609 // only on additional conditions, but also on fewer conditions. LLVM does
610 // not have a MachineInstr attribute which expresses this extended
611 // definition, so it's necessary to use `isConvergent` to prevent illegally
612 // CSE-ing the subset of `isConvergent` instructions which do fall into this
613 // extended definition.
614 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
615 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
616 "different BBs, avoid CSE!\n");
617 VNT.insert(&MI, CurrVN++);
618 Exps.push_back(&MI);
619 continue;
620 }
621
622 // Check if it's profitable to perform this CSE.
623 bool DoCSE = true;
624 unsigned NumDefs = MI.getNumDefs();
625
626 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
627 MachineOperand &MO = MI.getOperand(i);
628 if (!MO.isReg() || !MO.isDef())
629 continue;
630 Register OldReg = MO.getReg();
631 Register NewReg = CSMI->getOperand(i).getReg();
632
633 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
634 // we should make sure it is not dead at CSMI.
635 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
636 ImplicitDefsToUpdate.push_back(i);
637
638 // Keep track of implicit defs of CSMI and MI, to clear possibly
639 // made-redundant kill flags.
640 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
641 ImplicitDefs.push_back(OldReg);
642
643 if (OldReg == NewReg) {
644 --NumDefs;
645 continue;
646 }
647
648 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
649 "Do not CSE physical register defs!");
650
651 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
652 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
653 DoCSE = false;
654 break;
655 }
656
657 // Don't perform CSE if the result of the new instruction cannot exist
658 // within the constraints (register class, bank, or low-level type) of
659 // the old instruction.
660 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
662 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
663 DoCSE = false;
664 break;
665 }
666
667 CSEPairs.emplace_back(OldReg, NewReg);
668 --NumDefs;
669 }
670
671 // Actually perform the elimination.
672 if (DoCSE) {
673 for (const std::pair<Register, Register> &CSEPair : CSEPairs) {
674 Register OldReg = CSEPair.first;
675 Register NewReg = CSEPair.second;
676 // OldReg may have been unused but is used now, clear the Dead flag
677 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
678 assert(Def != nullptr && "CSEd register has no unique definition?");
679 Def->clearRegisterDeads(NewReg);
680 // Replace with NewReg and clear kill flags which may be wrong now.
681 MRI->replaceRegWith(OldReg, NewReg);
682 MRI->clearKillFlags(NewReg);
683 }
684
685 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
686 // we should make sure it is not dead at CSMI.
687 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
688 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
689 for (const auto &PhysDef : PhysDefs)
690 if (!MI.getOperand(PhysDef.first).isDead())
691 CSMI->getOperand(PhysDef.first).setIsDead(false);
692
693 // Go through implicit defs of CSMI and MI, and clear the kill flags on
694 // their uses in all the instructions between CSMI and MI.
695 // We might have made some of the kill flags redundant, consider:
696 // subs ... implicit-def %nzcv <- CSMI
697 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
698 // subs ... implicit-def %nzcv <- MI, to be eliminated
699 // csinc ... implicit killed %nzcv
700 // Since we eliminated MI, and reused a register imp-def'd by CSMI
701 // (here %nzcv), that register, if it was killed before MI, should have
702 // that kill flag removed, because it's lifetime was extended.
703 if (CSMI->getParent() == MI.getParent()) {
704 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
705 for (auto ImplicitDef : ImplicitDefs)
706 if (MachineOperand *MO = II->findRegisterUseOperand(
707 ImplicitDef, TRI, /*isKill=*/true))
708 MO->setIsKill(false);
709 } else {
710 // If the instructions aren't in the same BB, bail out and clear the
711 // kill flag on all uses of the imp-def'd register.
712 for (auto ImplicitDef : ImplicitDefs)
713 MRI->clearKillFlags(ImplicitDef);
714 }
715
716 if (CrossMBBPhysDef) {
717 // Add physical register defs now coming in from a predecessor to MBB
718 // livein list.
719 while (!PhysDefs.empty()) {
720 auto LiveIn = PhysDefs.pop_back_val();
721 if (!MBB->isLiveIn(LiveIn.second))
722 MBB->addLiveIn(LiveIn.second);
723 }
724 ++NumCrossBBCSEs;
725 }
726
727 MI.eraseFromParent();
728 ++NumCSEs;
729 if (!PhysRefs.empty())
730 ++NumPhysCSEs;
731 if (Commuted)
732 ++NumCommutes;
733 Changed = true;
734 } else {
735 VNT.insert(&MI, CurrVN++);
736 Exps.push_back(&MI);
737 }
738 CSEPairs.clear();
739 ImplicitDefsToUpdate.clear();
740 ImplicitDefs.clear();
741 }
742
743 return Changed;
744}
745
746/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
747/// dominator tree node if its a leaf or all of its children are done. Walk
748/// up the dominator tree to destroy ancestors which are now done.
749void MachineCSEImpl::ExitScopeIfDone(
750 MachineDomTreeNode *Node,
751 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) {
752 if (OpenChildren[Node])
753 return;
754
755 // Pop scope.
756 ExitScope(Node->getBlock());
757
758 // Now traverse upwards to pop ancestors whose offsprings are all done.
759 while (MachineDomTreeNode *Parent = Node->getIDom()) {
760 unsigned Left = --OpenChildren[Parent];
761 if (Left != 0)
762 break;
763 ExitScope(Parent->getBlock());
764 Node = Parent;
765 }
766}
767
768bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) {
771 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
772
773 CurrVN = 0;
774
775 // Perform a DFS walk to determine the order of visit.
776 WorkList.push_back(Node);
777 do {
778 Node = WorkList.pop_back_val();
779 Scopes.push_back(Node);
780 OpenChildren[Node] = Node->getNumChildren();
781 append_range(WorkList, Node->children());
782 } while (!WorkList.empty());
783
784 // Now perform CSE.
785 bool Changed = false;
786 for (MachineDomTreeNode *Node : Scopes) {
787 MachineBasicBlock *MBB = Node->getBlock();
788 EnterScope(MBB);
789 Changed |= ProcessBlockCSE(MBB);
790 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
791 ExitScopeIfDone(Node, OpenChildren);
792 }
793
794 return Changed;
795}
796
797// We use stronger checks for PRE candidate rather than for CSE ones to embrace
798// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
799// to exclude instrs created by PRE that won't be CSEed later.
800bool MachineCSEImpl::isPRECandidate(MachineInstr *MI,
801 SmallSet<MCRegister, 8> &PhysRefs) {
802 if (!isCSECandidate(MI) ||
803 MI->isNotDuplicable() ||
804 MI->mayLoad() ||
806 MI->getNumDefs() != 1 ||
807 MI->getNumExplicitDefs() != 1)
808 return false;
809
810 for (const MachineOperand &MO : MI->operands()) {
811 if (MO.isReg() && !MO.getReg().isVirtual()) {
812 if (MO.isDef())
813 return false;
814 else
815 PhysRefs.insert(MO.getReg());
816 }
817 }
818
819 return true;
820}
821
822bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
823 MachineBasicBlock *MBB) {
824 bool Changed = false;
825 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
826 SmallSet<MCRegister, 8> PhysRefs;
827 if (!isPRECandidate(&MI, PhysRefs))
828 continue;
829
830 auto [It, Inserted] = PREMap.try_emplace(&MI, MBB);
831 if (Inserted)
832 continue;
833
834 auto *MBB1 = It->second;
835 assert(
836 !DT->properlyDominates(MBB, MBB1) &&
837 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
838 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
839 if (!CMBB->isLegalToHoistInto())
840 continue;
841
842 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
843 continue;
844
845 // Two instrs are partial redundant if their basic blocks are reachable
846 // from one to another but one doesn't dominate another.
847 if (CMBB != MBB1) {
848 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
849 if (BB != nullptr && BB1 != nullptr &&
850 (isPotentiallyReachable(BB1, BB) ||
851 isPotentiallyReachable(BB, BB1))) {
852 // The following check extends the definition of `isConvergent` to
853 // assume a convergent instruction is dependent not only on additional
854 // conditions, but also on fewer conditions. LLVM does not have a
855 // MachineInstr attribute which expresses this extended definition, so
856 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
857 // subset of `isConvergent` instructions which do fall into this
858 // extended definition.
859 if (MI.isConvergent() && CMBB != MBB)
860 continue;
861
862 // If this instruction uses physical registers then we can only do PRE
863 // if it's using the value that is live at the place we're hoisting to.
864 bool NonLocal;
865 PhysDefVector PhysDefs;
866 if (!PhysRefs.empty() &&
867 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
868 PhysDefs, NonLocal))
869 continue;
870
871 assert(MI.getOperand(0).isDef() &&
872 "First operand of instr with one explicit def must be this def");
873 Register VReg = MI.getOperand(0).getReg();
874 Register NewReg = MRI->cloneVirtualRegister(VReg);
875 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
876 continue;
877 MachineInstr &NewMI =
878 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
879
880 // When hoisting, make sure we don't carry the debug location of
881 // the original instruction, as that's not correct and can cause
882 // unexpected jumps when debugging optimized code.
883 auto EmptyDL = DebugLoc();
884 NewMI.setDebugLoc(EmptyDL);
885
886 NewMI.getOperand(0).setReg(NewReg);
887
888 PREMap[&MI] = CMBB;
889 ++NumPREs;
890 Changed = true;
891 }
892 }
893 }
894 return Changed;
895}
896
897// This simple PRE (partial redundancy elimination) pass doesn't actually
898// eliminate partial redundancy but transforms it to full redundancy,
899// anticipating that the next CSE step will eliminate this created redundancy.
900// If CSE doesn't eliminate this, than created instruction will remain dead
901// and eliminated later by Remove Dead Machine Instructions pass.
902bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) {
904
905 PREMap.clear();
906 bool Changed = false;
907 BBs.push_back(DT->getRootNode());
908 do {
909 auto Node = BBs.pop_back_val();
910 append_range(BBs, Node->children());
911
912 MachineBasicBlock *MBB = Node->getBlock();
913 Changed |= ProcessBlockPRE(DT, MBB);
914
915 } while (!BBs.empty());
916
917 return Changed;
918}
919
920bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
921 MachineBasicBlock *MBB,
922 MachineBasicBlock *MBB1) {
923 if (CandidateBB->getParent()->getFunction().hasMinSize())
924 return true;
925 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
926 assert(DT->dominates(CandidateBB, MBB1) &&
927 "CandidateBB should dominate MBB1");
928 return MBFI->getBlockFreq(CandidateBB) <=
929 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
930}
931
932void MachineCSEImpl::releaseMemory() {
933 ScopeMap.clear();
934 PREMap.clear();
935 Exps.clear();
936}
937
938bool MachineCSEImpl::run(MachineFunction &MF) {
941 MRI = &MF.getRegInfo();
942 LookAheadLimit = TII->getMachineCSELookAheadLimit();
943 bool ChangedPRE, ChangedCSE;
944 ChangedPRE = PerformSimplePRE(DT);
945 ChangedCSE = PerformCSE(DT->getRootNode());
946 releaseMemory();
947 return ChangedPRE || ChangedCSE;
948}
949
952 MFPropsModifier _(*this, MF);
953
957 MachineCSEImpl Impl(&MDT, &MBFI);
958 bool Changed = Impl.run(MF);
959 if (!Changed)
960 return PreservedAnalyses::all();
961
963 PA.preserve<MachineLoopAnalysis>();
964 PA.preserve<MachineDominatorTreeAnalysis>();
965 PA.preserve<MachineBlockFrequencyAnalysis>();
966 PA.preserveSet<CFGAnalyses>();
967 return PA;
968}
969
970bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) {
971 if (skipFunction(MF.getFunction()))
972 return false;
973
975 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
977 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
978 MachineCSEImpl Impl(&MDT, &MBFI);
979 return Impl.run(MF);
980}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool isAsCheapAsAMove(const MachineInstr &MI) const override
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< const MachineInstr > const_iterator
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
size_type count(const K &Key) const
Return 1 if the specified key is in the table, 0 otherwise.
void insert(const K &Key, const V &Val)
V lookup(const K &Key) const
ScopedHashTableScope< MachineInstr *, unsigned, MachineInstrExpressionTrait, AllocatorTy > ScopeTy
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2454
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
LLVM_ABI void initializeMachineCSELegacyPass(PassRegistry &)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
LLVM_ABI char & MachineCSELegacyID
MachineCSE - This pass performs global CSE on machine instructions.
LLVM_ABI bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition CFG.cpp:282