20#define DEBUG_TYPE "machine-scheduler"
28 VLIW5 = !ST.hasCaymanISA();
30 CurInstKind = IDOther;
32 OccupiedSlotsMask = 31;
33 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
34 InstKindLimit[IDOther] = 32;
35 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
40void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
41 std::vector<SUnit *> &QDst)
48 assert (GPRCount &&
"GPRCount cannot be 0");
49 return 248 / GPRCount;
54 NextInstKind = IDOther;
59 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
60 (Available[CurInstKind].empty());
61 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
62 (!Available[IDFetch].empty() || !Available[IDOther].empty());
64 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
69 float ALUFetchRationEstimate =
70 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
71 (FetchInstCount + Available[IDFetch].
size());
72 if (ALUFetchRationEstimate == 0) {
73 AllowSwitchFromAlu =
true;
75 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
76 LLVM_DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n");
87 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
89 AllowSwitchFromAlu =
true;
93 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
94 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
97 if (!SU && !PhysicalRegCopy.empty()) {
98 SU = PhysicalRegCopy.front();
99 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
102 if (CurEmitted >= InstKindLimit[IDAlu])
104 NextInstKind = IDAlu;
110 SU = pickOther(IDFetch);
112 NextInstKind = IDFetch;
117 SU = pickOther(IDOther);
119 NextInstKind = IDOther;
123 dbgs() <<
" ** Pick node **\n";
126 dbgs() <<
"NO NODE \n";
127 for (
const SUnit &S : DAG->SUnits)
136 if (NextInstKind != CurInstKind) {
138 if (NextInstKind != IDAlu)
139 OccupiedSlotsMask |= 31;
141 CurInstKind = NextInstKind;
144 if (CurInstKind == IDAlu) {
146 switch (getAluKind(SU)) {
157 if (MO.
isReg() && MO.
getReg() == R600::ALU_LITERAL_X)
166 LLVM_DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
168 if (CurInstKind != IDFetch) {
169 MoveUnits(Pending[IDFetch], Available[IDFetch]);
176 if (
MI->getOpcode() != R600::COPY)
179 return !
MI->getOperand(1).getReg().isVirtual();
189 PhysicalRegCopy.push_back(SU);
193 int IK = getInstKind(SU);
197 Available[IDOther].push_back(SU);
199 Pending[IK].push_back(SU);
203bool R600SchedStrategy::regBelongsToClass(
Register Reg,
205 if (!Reg.isVirtual())
207 return MRI->getRegClass(Reg) == RC;
210R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
213 if (
TII->isTransOnly(*
MI))
216 switch (
MI->getOpcode()) {
219 case R600::INTERP_PAIR_XY:
220 case R600::INTERP_PAIR_ZW:
221 case R600::INTERP_VEC_LOAD:
225 if (
MI->getOperand(1).isUndef()) {
237 if(TII->isVector(*
MI) ||
238 TII->isCubeOp(
MI->getOpcode()) ||
239 TII->isReductionOp(
MI->getOpcode()) ||
240 MI->getOpcode() == R600::GROUP_BARRIER) {
244 if (TII->isLDSInstr(
MI->getOpcode())) {
249 unsigned DestSubReg =
MI->getOperand(0).getSubReg();
250 switch (DestSubReg) {
264 Register DestReg =
MI->getOperand(0).getReg();
265 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
266 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
268 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
270 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
272 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
274 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
278 if (TII->readsLDSSrcReg(*
MI))
284int R600SchedStrategy::getInstKind(
SUnit* SU) {
287 if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
290 if (TII->isALUInstr(Opcode)) {
297 case R600::CONST_COPY:
298 case R600::INTERP_PAIR_XY:
299 case R600::INTERP_PAIR_ZW:
300 case R600::INTERP_VEC_LOAD:
308SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
311 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(),
E = Q.rend();
314 InstructionsGroupCandidate.push_back(SU->
getInstr());
315 if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) &&
316 (!AnyALU || !TII->isVectorOnly(*SU->
getInstr()))) {
317 InstructionsGroupCandidate.pop_back();
318 Q.erase((It + 1).base());
321 InstructionsGroupCandidate.pop_back();
326void R600SchedStrategy::LoadAlu() {
327 std::vector<SUnit *> &QSrc = Pending[IDAlu];
328 for (SUnit *SU : QSrc) {
329 AluKind AK = getAluKind(SU);
330 AvailableAlus[AK].push_back(SU);
335void R600SchedStrategy::PrepareNextSlot() {
337 assert(OccupiedSlotsMask &&
"Slot wasn't filled");
338 OccupiedSlotsMask = 0;
341 InstructionsGroupCandidate.clear();
345void R600SchedStrategy::AssignSlot(
MachineInstr*
MI,
unsigned Slot) {
346 int DstIndex = TII->getOperandIdx(
MI->getOpcode(), R600::OpName::dst);
347 if (DstIndex == -1) {
350 Register DestReg =
MI->getOperand(DstIndex).getReg();
353 for (
const MachineOperand &MO :
MI->all_uses())
354 if (MO.getReg() == DestReg)
359 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
362 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
365 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
368 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
373SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
374 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
375 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
378 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
380 AssignSlot(UnslotedSU->
getInstr(), Slot);
384unsigned R600SchedStrategy::AvailablesAluCount()
const {
385 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
386 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
387 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
388 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
389 AvailableAlus[AluPredX].size();
392SUnit* R600SchedStrategy::pickAlu() {
393 while (AvailablesAluCount() || !Pending[IDAlu].
empty()) {
394 if (!OccupiedSlotsMask) {
396 if (!AvailableAlus[AluPredX].
empty()) {
397 OccupiedSlotsMask |= 31;
398 return PopInst(AvailableAlus[AluPredX],
false);
401 if (!AvailableAlus[AluDiscarded].
empty()) {
402 OccupiedSlotsMask |= 31;
403 return PopInst(AvailableAlus[AluDiscarded],
false);
406 if (!AvailableAlus[AluT_XYZW].
empty()) {
407 OccupiedSlotsMask |= 15;
408 return PopInst(AvailableAlus[AluT_XYZW],
false);
411 bool TransSlotOccupied = OccupiedSlotsMask & 16;
412 if (!TransSlotOccupied && VLIW5) {
413 if (!AvailableAlus[AluTrans].
empty()) {
414 OccupiedSlotsMask |= 16;
415 return PopInst(AvailableAlus[AluTrans],
false);
417 SUnit *SU = AttemptFillSlot(3,
true);
419 OccupiedSlotsMask |= 16;
423 for (
int Chan = 3; Chan > -1; --Chan) {
424 bool isOccupied = OccupiedSlotsMask & (1 << Chan);
426 SUnit *SU = AttemptFillSlot(Chan,
false);
428 OccupiedSlotsMask |= (1 << Chan);
429 InstructionsGroupCandidate.push_back(SU->
getInstr());
439SUnit* R600SchedStrategy::pickOther(
int QID) {
441 std::vector<SUnit *> &AQ = Available[QID];
444 MoveUnits(Pending[QID], AQ);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
Promote Memory to Register
Provides R600 specific target descriptions.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
static bool isPhysicalRegCopy(MachineInstr *MI)
R600 Machine Scheduler interface.
AMDGPU R600 specific subclass of TargetSubtarget.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mop_iterator operands_end()
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Wrapper class representing virtual and physical registers.
Scheduling unit. This is a node in the scheduling DAG.
bool isScheduled
True once scheduled.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.