LLVM 22.0.0git
PPCCTRLoops.cpp
Go to the documentation of this file.
1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16// its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21// a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
42#include "llvm/Pass.h"
44#include <cassert>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-ctrloops"
49
50STATISTIC(NumCTRLoops, "Number of CTR loops generated");
51STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
52
53namespace {
54class PPCCTRLoops : public MachineFunctionPass {
55public:
56 static char ID;
57
58 PPCCTRLoops() : MachineFunctionPass(ID) {}
59
60 void getAnalysisUsage(AnalysisUsage &AU) const override {
61 AU.addRequired<MachineLoopInfoWrapperPass>();
63 }
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67private:
68 const PPCInstrInfo *TII = nullptr;
69 MachineRegisterInfo *MRI = nullptr;
70
71 bool processLoop(MachineLoop *ML);
72 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
73 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
74 MachineInstr *Dec);
75 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
76};
77} // namespace
78
79char PPCCTRLoops::ID = 0;
80
81INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
82 false, false)
84INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
86
87FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
88
89bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
90 bool Changed = false;
91
92 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
93 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
94 MRI = &MF.getRegInfo();
95
96 for (auto *ML : MLI) {
97 if (ML->isOutermost())
98 Changed |= processLoop(ML);
99 }
100
101#ifndef NDEBUG
102 for (const MachineBasicBlock &BB : MF) {
103 for (const MachineInstr &I : BB)
104 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
105 I.getOpcode() != PPC::DecreaseCTR8loop) &&
106 "CTR loop pseudo is not expanded!");
107 }
108#endif
109
110 return Changed;
111}
112
113bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
114 if (!CheckReads) {
115 // If we are only checking for defs, that is we are going to find
116 // definitions before MTCTRloop, for this case:
117 // CTR defination inside the callee of a call instruction will not impact
118 // the defination of MTCTRloop, so we can use definesRegister() for the
119 // check, no need to check the regmask.
120 return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
121 MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
122 }
123
124 if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
125 MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
126 return true;
127
128 if (MI->getDesc().isCall())
129 return true;
130
131 // We define the CTR in the loop preheader, so if there is any CTR reader in
132 // the loop, we also can not use CTR loop form.
133 if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
134 MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
135 return true;
136
137 return false;
138}
139
140bool PPCCTRLoops::processLoop(MachineLoop *ML) {
141 bool Changed = false;
142
143 // Align with HardwareLoop pass, process inner loops first.
144 for (MachineLoop *I : *ML)
145 Changed |= processLoop(I);
146
147 // If any inner loop is changed, outter loop must be without hardware loop
148 // intrinsics.
149 if (Changed)
150 return true;
151
152 auto IsLoopStart = [](MachineInstr &MI) {
153 return MI.getOpcode() == PPC::MTCTRloop ||
154 MI.getOpcode() == PPC::MTCTR8loop;
155 };
156
157 auto SearchForStart =
158 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
159 for (auto &MI : *MBB) {
160 if (IsLoopStart(MI))
161 return &MI;
162 }
163 return nullptr;
164 };
165
166 MachineInstr *Start = nullptr;
167 MachineInstr *Dec = nullptr;
168 bool InvalidCTRLoop = false;
169
170 MachineBasicBlock *Preheader = ML->getLoopPreheader();
171 // If there is no preheader for this loop, there must be no MTCTRloop
172 // either.
173 if (!Preheader)
174 return false;
175
176 Start = SearchForStart(Preheader);
177 // This is not a CTR loop candidate.
178 if (!Start)
179 return false;
180
181 // If CTR is live to the preheader, we can not redefine the CTR register.
182 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
183 InvalidCTRLoop = true;
184
185 // Make sure there is also no CTR clobber in the block preheader between the
186 // begin and MTCTR.
188 std::next(Start->getReverseIterator());
189 I != Preheader->instr_rend(); ++I)
190 // Only check the definitions of CTR. If there is non-dead definition for
191 // the CTR, we conservatively don't generate a CTR loop.
192 if (isCTRClobber(&*I, /* CheckReads */ false)) {
193 InvalidCTRLoop = true;
194 break;
195 }
196
197 // Make sure there is also no CTR clobber/user in the block preheader between
198 // MTCTR and the end.
199 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
200 I != Preheader->instr_end(); ++I)
201 if (isCTRClobber(&*I, /* CheckReads */ true)) {
202 InvalidCTRLoop = true;
203 break;
204 }
205
206 // Find the CTR loop components and decide whether or not to fall back to a
207 // normal loop.
208 for (auto *MBB : reverse(ML->getBlocks())) {
209 for (auto &MI : *MBB) {
210 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
211 MI.getOpcode() == PPC::DecreaseCTR8loop)
212 Dec = &MI;
213 else if (!InvalidCTRLoop)
214 // If any instruction clobber CTR, then we can not generate a CTR loop.
215 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
216 }
217 if (Dec && InvalidCTRLoop)
218 break;
219 }
220
221 assert(Dec && "CTR loop is not complete!");
222
223 if (InvalidCTRLoop) {
224 expandNormalLoops(ML, Start, Dec);
225 ++NumNormalLoops;
226 }
227 else {
228 expandCTRLoops(ML, Start, Dec);
229 ++NumCTRLoops;
230 }
231 return true;
232}
233
234void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
235 MachineInstr *Dec) {
236 bool Is64Bit =
237 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
238
239 MachineBasicBlock *Preheader = Start->getParent();
240 MachineBasicBlock *Exiting = Dec->getParent();
241 assert((Preheader && Exiting) &&
242 "Preheader and exiting should exist for CTR loop!");
243
244 assert(Dec->getOperand(1).getImm() == 1 &&
245 "Loop decrement stride must be 1");
246
247 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
248 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
249
250 Register PHIDef =
251 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
252 : &PPC::GPRC_and_GPRC_NOR0RegClass);
253
254 Start->getParent()->getParent()->getProperties().resetNoPHIs();
255
256 // Generate "PHI" in the header block.
257 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
258 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
259 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
260
261 Register ADDIDef =
262 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
263 : &PPC::GPRC_and_GPRC_NOR0RegClass);
264 // Generate "addi -1" in the exiting block.
265 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
266 .addReg(PHIDef)
267 .addImm(-1);
268
269 // Add other inputs for the PHI node.
270 if (ML->isLoopLatch(Exiting)) {
271 // There must be only two predecessors for the loop header, one is the
272 // Preheader and the other one is loop latch Exiting. In hardware loop
273 // insertion pass, the block containing DecreaseCTRloop must dominate all
274 // loop latches. So there must be only one latch.
275 assert(ML->getHeader()->pred_size() == 2 &&
276 "Loop header predecessor is not right!");
277 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
278 } else {
279 // If the block containing DecreaseCTRloop is not a loop latch, we can use
280 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
281 // insertion pass, the block containing DecreaseCTRloop must dominate all
282 // loop latches.
283 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
284 if (ML->contains(P)) {
285 assert(ML->isLoopLatch(P) &&
286 "Loop's header in-loop predecessor is not loop latch!");
287 PHIMIB.addReg(ADDIDef).addMBB(P);
288 } else
289 assert(P == Preheader &&
290 "CTR loop should not be generated for irreducible loop!");
291 }
292 }
293
294 // Generate the compare in the exiting block.
295 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
296 auto CMPMIB =
297 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
298 .addReg(ADDIDef)
299 .addImm(0);
300
301 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
302 Dec->getOperand(0).getReg())
303 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
304
305 // Remove the pseudo instructions.
306 Start->eraseFromParent();
307 Dec->eraseFromParent();
308}
309
310void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
311 MachineInstr *Dec) {
312 bool Is64Bit =
313 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
314
315 MachineBasicBlock *Preheader = Start->getParent();
316 MachineBasicBlock *Exiting = Dec->getParent();
317
318 (void)Preheader;
319 assert((Preheader && Exiting) &&
320 "Preheader and exiting should exist for CTR loop!");
321
322 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
323
324 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
325 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
326 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
327 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
328 "There should be only one user for loop decrement pseudo!");
329
330 unsigned Opcode = 0;
331 switch (BrInstr->getOpcode()) {
332 case PPC::BC:
333 Opcode = BDNZOpcode;
334 (void) ML;
335 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
336 "Invalid ctr loop!");
337 break;
338 case PPC::BCn:
339 Opcode = BDZOpcode;
340 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
341 "Invalid ctr loop!");
342 break;
343 default:
344 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
345 }
346
347 // Generate "bdnz/bdz" in the exiting block just before the terminator.
348 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
349 .addMBB(BrInstr->getOperand(1).getMBB());
350
351 // Remove the pseudo instructions.
352 BrInstr->eraseFromParent();
353 Dec->eraseFromParent();
354}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
AnalysisUsage & addRequired()
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCCTRLoopsPass()
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400