LLVM 22.0.0git
SystemZHazardRecognizer.cpp
Go to the documentation of this file.
1//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a hazard recognizer for the SystemZ scheduler.
10//
11// This class is used by the SystemZ scheduling strategy to maintain
12// the state during scheduling, and provide cost functions for
13// scheduling candidates. This includes:
14//
15// * Decoder grouping. A decoder group can maximally hold 3 uops, and
16// instructions that always begin a new group should be scheduled when
17// the current decoder group is empty.
18// * Processor resources usage. It is beneficial to balance the use of
19// resources.
20//
21// A goal is to consider all instructions, also those outside of any
22// scheduling region. Such instructions are "advanced" past and include
23// single instructions before a scheduling region, branches etc.
24//
25// A block that has only one predecessor continues scheduling with the state
26// of it (which may be updated by emitting branches).
27//
28// ===---------------------------------------------------------------------===//
29
31#include "llvm/ADT/Statistic.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "machine-scheduler"
36
37// This is the limit of processor resource usage at which the
38// scheduler should try to look for other instructions (not using the
39// critical resource).
40static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
41 cl::desc("The OOO window for processor "
42 "resources during scheduling."),
43 cl::init(8));
44
45unsigned SystemZHazardRecognizer::
46getNumDecoderSlots(SUnit *SU) const {
47 const MCSchedClassDesc *SC = getSchedClass(SU);
48 if (!SC->isValid())
49 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
50
51 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52 "Only cracked instruction can have 2 uops.");
53 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54 "Expanded instructions always group alone.");
55 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56 "Expanded instructions fill the group(s).");
57
58 return SC->NumMicroOps;
59}
60
61unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
62 unsigned Idx = CurrGroupSize;
63 if (GrpCount % 2)
64 Idx += 3;
65
66 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
67 if (Idx == 1 || Idx == 2)
68 Idx = 3;
69 else if (Idx == 4 || Idx == 5)
70 Idx = 0;
71 }
72
73 return Idx;
74}
75
77getHazardType(SUnit *SU, int Stalls) {
78 return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard);
79}
80
82 CurrGroupSize = 0;
83 CurrGroupHas4RegOps = false;
84 clearProcResCounters();
85 GrpCount = 0;
86 LastFPdOpCycleIdx = UINT_MAX;
87 LastEmittedMI = nullptr;
89}
90
91bool
92SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93 const MCSchedClassDesc *SC = getSchedClass(SU);
94 if (!SC->isValid())
95 return true;
96
97 // A cracked instruction only fits into schedule if the current
98 // group is empty.
99 if (SC->BeginGroup)
100 return (CurrGroupSize == 0);
101
102 // An instruction with 4 register operands will not fit in last slot.
103 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104 "Current decoder group is already full!");
105 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
106 return false;
107
108 // Since a full group is handled immediately in EmitInstruction(),
109 // SU should fit into current group. NumSlots should be 1 or 0,
110 // since it is not a cracked or expanded instruction.
111 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112 "Expected normal instruction to fit in non-full group!");
113
114 return true;
115}
116
117bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
118 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
119 const MCInstrDesc &MID = MI->getDesc();
120 unsigned Count = 0;
121 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
122 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI);
123 if (RC == nullptr)
124 continue;
125 if (OpIdx >= MID.getNumDefs() &&
127 continue;
128 Count++;
129 }
130 return Count >= 4;
131}
132
133void SystemZHazardRecognizer::nextGroup() {
134 if (CurrGroupSize == 0)
135 return;
136
137 LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
138 LLVM_DEBUG(CurGroupDbg = "";);
139
140 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
141 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
142 "Current decoder group bad.");
143
144 // Reset counter for next group.
145 CurrGroupSize = 0;
146 CurrGroupHas4RegOps = false;
147
148 GrpCount += ((unsigned) NumGroups);
149
150 // Decrease counters for execution units.
151 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
152 ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
153 ? (ProcResourceCounters[i] - NumGroups)
154 : 0);
155
156 // Clear CriticalResourceIdx if it is now below the threshold.
157 if (CriticalResourceIdx != UINT_MAX &&
158 (ProcResourceCounters[CriticalResourceIdx] <=
160 CriticalResourceIdx = UINT_MAX;
161
163}
164
165#ifndef NDEBUG // Debug output
167 OS << "SU(" << SU->NodeNum << "):";
168 OS << TII->getName(SU->getInstr()->getOpcode());
169
170 const MCSchedClassDesc *SC = getSchedClass(SU);
171 if (!SC->isValid())
172 return;
173
175 PI = SchedModel->getWriteProcResBegin(SC),
176 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
177 const MCProcResourceDesc &PRD =
178 *SchedModel->getProcResource(PI->ProcResourceIdx);
179 std::string FU(PRD.Name);
180 // trim e.g. Z13_FXaUnit -> FXa
181 FU = FU.substr(FU.find('_') + 1);
182 size_t Pos = FU.find("Unit");
183 if (Pos != std::string::npos)
184 FU.resize(Pos);
185 if (FU == "LS") // LSUnit -> LSU
186 FU = "LSU";
187 OS << "/" << FU;
188
189 if (PI->ReleaseAtCycle> 1)
190 OS << "(" << PI->ReleaseAtCycle << "cyc)";
191 }
192
193 if (SC->NumMicroOps > 1)
194 OS << "/" << SC->NumMicroOps << "uops";
195 if (SC->BeginGroup && SC->EndGroup)
196 OS << "/GroupsAlone";
197 else if (SC->BeginGroup)
198 OS << "/BeginsGroup";
199 else if (SC->EndGroup)
200 OS << "/EndsGroup";
201 if (SU->isUnbuffered)
202 OS << "/Unbuffered";
203 if (has4RegOps(SU->getInstr()))
204 OS << "/4RegOps";
205}
206
207void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
208 dbgs() << "++ " << Msg;
209 dbgs() << ": ";
210
211 if (CurGroupDbg.empty())
212 dbgs() << " <empty>\n";
213 else {
214 dbgs() << "{ " << CurGroupDbg << " }";
215 dbgs() << " (" << CurrGroupSize << " decoder slot"
216 << (CurrGroupSize > 1 ? "s":"")
217 << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
218 << ")\n";
219 }
220}
221
223 bool any = false;
224
225 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
226 if (ProcResourceCounters[i] > 0) {
227 any = true;
228 break;
229 }
230
231 if (!any)
232 return;
233
234 dbgs() << "++ | Resource counters: ";
235 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
236 if (ProcResourceCounters[i] > 0)
237 dbgs() << SchedModel->getProcResource(i)->Name
238 << ":" << ProcResourceCounters[i] << " ";
239 dbgs() << "\n";
240
241 if (CriticalResourceIdx != UINT_MAX)
242 dbgs() << "++ | Critical resource: "
243 << SchedModel->getProcResource(CriticalResourceIdx)->Name
244 << "\n";
245}
246
248 dumpCurrGroup("| Current decoder group");
249 dbgs() << "++ | Current cycle index: "
250 << getCurrCycleIdx() << "\n";
252 if (LastFPdOpCycleIdx != UINT_MAX)
253 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
254}
255
256#endif //NDEBUG
257
258void SystemZHazardRecognizer::clearProcResCounters() {
259 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
260 CriticalResourceIdx = UINT_MAX;
261}
262
263static inline bool isBranchRetTrap(MachineInstr *MI) {
264 return (MI->isBranch() || MI->isReturn() ||
265 MI->getOpcode() == SystemZ::CondTrap);
266}
267
268// Update state with SU as the next scheduled unit.
271 const MCSchedClassDesc *SC = getSchedClass(SU);
272 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
273 dbgs() << "\n";);
274 LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
275
276 // If scheduling an SU that must begin a new decoder group, move on
277 // to next group.
278 if (!fitsIntoCurrentGroup(SU))
279 nextGroup();
280
282 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
283
284 LastEmittedMI = SU->getInstr();
285
286 // After returning from a call, we don't know much about the state.
287 if (SU->isCall) {
288 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
289 Reset();
290 LastEmittedMI = SU->getInstr();
291 return;
292 }
293
294 // Increase counter for execution unit(s).
296 PI = SchedModel->getWriteProcResBegin(SC),
297 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
298 // Don't handle FPd together with the other resources.
299 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
300 continue;
301 int &CurrCounter =
302 ProcResourceCounters[PI->ProcResourceIdx];
303 CurrCounter += PI->ReleaseAtCycle;
304 // Check if this is now the new critical resource.
305 if ((CurrCounter > ProcResCostLim) &&
306 (CriticalResourceIdx == UINT_MAX ||
307 (PI->ProcResourceIdx != CriticalResourceIdx &&
308 CurrCounter >
309 ProcResourceCounters[CriticalResourceIdx]))) {
311 dbgs() << "++ New critical resource: "
312 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
313 << "\n";);
314 CriticalResourceIdx = PI->ProcResourceIdx;
315 }
316 }
317
318 // Make note of an instruction that uses a blocking resource (FPd).
319 if (SU->isUnbuffered) {
320 LastFPdOpCycleIdx = getCurrCycleIdx(SU);
321 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
322 << "\n";);
323 }
324
325 // Insert SU into current group by increasing number of slots used
326 // in current group.
327 CurrGroupSize += getNumDecoderSlots(SU);
328 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
329 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
330 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
331 && "SU does not fit into decoder group!");
332
333 // Check if current group is now full/ended. If so, move on to next
334 // group to be ready to evaluate more candidates.
335 if (CurrGroupSize >= GroupLim || SC->EndGroup)
336 nextGroup();
337}
338
340 const MCSchedClassDesc *SC = getSchedClass(SU);
341 if (!SC->isValid())
342 return 0;
343
344 // If SU begins new group, it can either break a current group early
345 // or fit naturally if current group is empty (negative cost).
346 if (SC->BeginGroup) {
347 if (CurrGroupSize)
348 return 3 - CurrGroupSize;
349 return -1;
350 }
351
352 // Similarly, a group-ending SU may either fit well (last in group), or
353 // end the group prematurely.
354 if (SC->EndGroup) {
355 unsigned resultingGroupSize =
356 (CurrGroupSize + getNumDecoderSlots(SU));
357 if (resultingGroupSize < 3)
358 return (3 - resultingGroupSize);
359 return -1;
360 }
361
362 // An instruction with 4 register operands will not fit in last slot.
363 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
364 return 1;
365
366 // Most instructions can be placed in any decoder slot.
367 return 0;
368}
369
370bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
371 assert (SU->isUnbuffered);
372 // If this is the first FPd op, it should be scheduled high.
373 if (LastFPdOpCycleIdx == UINT_MAX)
374 return true;
375 // If this is not the first PFd op, it should go into the other side
376 // of the processor to use the other FPd unit there. This should
377 // generally happen if two FPd ops are placed with 2 other
378 // instructions between them (modulo 6).
379 unsigned SUCycleIdx = getCurrCycleIdx(SU);
380 if (LastFPdOpCycleIdx > SUCycleIdx)
381 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
382 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
383}
384
386resourcesCost(SUnit *SU) {
387 int Cost = 0;
388
389 const MCSchedClassDesc *SC = getSchedClass(SU);
390 if (!SC->isValid())
391 return 0;
392
393 // For a FPd op, either return min or max value as indicated by the
394 // distance to any prior FPd op.
395 if (SU->isUnbuffered)
396 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
397 // For other instructions, give a cost to the use of the critical resource.
398 else if (CriticalResourceIdx != UINT_MAX) {
400 PI = SchedModel->getWriteProcResBegin(SC),
401 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
402 if (PI->ProcResourceIdx == CriticalResourceIdx)
403 Cost = PI->ReleaseAtCycle;
404 }
405
406 return Cost;
407}
408
410 bool TakenBranch) {
411 // Make a temporary SUnit.
412 SUnit SU(MI, 0);
413
414 // Set interesting flags.
415 SU.isCall = MI->isCall();
416
417 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
418 for (const MCWriteProcResEntry &PRE :
419 make_range(SchedModel->getWriteProcResBegin(SC),
420 SchedModel->getWriteProcResEnd(SC))) {
421 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
422 case 0:
423 SU.hasReservedResource = true;
424 break;
425 case 1:
426 SU.isUnbuffered = true;
427 break;
428 default:
429 break;
430 }
431 }
432
433 unsigned GroupSizeBeforeEmit = CurrGroupSize;
434 EmitInstruction(&SU);
435
436 if (!TakenBranch && isBranchRetTrap(MI)) {
437 // NT Branch on second slot ends group.
438 if (GroupSizeBeforeEmit == 1)
439 nextGroup();
440 }
441
442 if (TakenBranch && CurrGroupSize > 0)
443 nextGroup();
444
445 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
446 "Scheduler: unhandled terminator!");
447}
448
451 // Current decoder group
452 CurrGroupSize = Incoming->CurrGroupSize;
453 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
454
455 // Processor resources
456 ProcResourceCounters = Incoming->ProcResourceCounters;
457 CriticalResourceIdx = Incoming->CriticalResourceIdx;
458
459 // FPd
460 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
461 GrpCount = Incoming->GrpCount;
462}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
MachineInstr unsigned OpIdx
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool isBranchRetTrap(MachineInstr *MI)
static cl::opt< int > ProcResCostLim("procres-cost-lim", cl::Hidden, cl::desc("The OOO window for processor " "resources during scheduling."), cl::init(8))
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
bool hasReservedResource
Uses a reserved resource.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void assign(size_type NumElts, ValueParamT Elt)
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void emitInstruction(MachineInstr *MI, bool TakenBranch=false)
Wrap a non-scheduled instruction in an SU and emit it.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void copyState(SystemZHazardRecognizer *Incoming)
Copy counters from end of single predecessor.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
void dumpSU(SUnit *SU, raw_ostream &OS) const
HazardType getHazardType(SUnit *SU, int Stalls=0) override
getHazardType - Return the hazard type of emitting this node.
void dumpCurrGroup(std::string Msg="") const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
SystemZHazardRecognizer(const SystemZInstrInfo *tii, const TargetSchedModel *SM)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MCWriteProcResEntry * ProcResIter
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
InstructionCost Cost
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Define a kind of processor resource that will be modeled by the scheduler.
Definition MCSchedule.h:36
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68