LLVM 22.0.0git
GCNPreRAOptimizations.cpp
Go to the documentation of this file.
1//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass combines split register tuple initialization into a single pseudo:
11///
12/// undef %0.sub1:sreg_64 = S_MOV_B32 1
13/// %0.sub0:sreg_64 = S_MOV_B32 2
14/// =>
15/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16///
17/// This is to allow rematerialization of a value instead of spilling. It is
18/// supposed to be done after register coalescer to allow it to do its job and
19/// before actual register allocation to allow rematerialization.
20///
21/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22/// although the same shall be possible with other register classes and
23/// instructions if necessary.
24///
25/// This pass also adds register allocation hints to COPY.
26/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
27/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
28/// and a VGPR_16. If we use the VGPR_16 that corresponds to the lo16 bits of
29/// the VGPR_32, the COPY can be completely eliminated.
30///
31//===----------------------------------------------------------------------===//
32
34#include "AMDGPU.h"
35#include "GCNSubtarget.h"
37#include "SIRegisterInfo.h"
41
42using namespace llvm;
43
44#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
45
46namespace {
47
48class GCNPreRAOptimizationsImpl {
49private:
50 const SIInstrInfo *TII;
51 const SIRegisterInfo *TRI;
53 LiveIntervals *LIS;
54
55 bool processReg(Register Reg);
56
57public:
58 GCNPreRAOptimizationsImpl(LiveIntervals *LS) : LIS(LS) {}
59 bool run(MachineFunction &MF);
60};
61
62class GCNPreRAOptimizationsLegacy : public MachineFunctionPass {
63public:
64 static char ID;
65
66 GCNPreRAOptimizationsLegacy() : MachineFunctionPass(ID) {
68 }
69
70 bool runOnMachineFunction(MachineFunction &MF) override;
71
72 StringRef getPassName() const override {
73 return "AMDGPU Pre-RA optimizations";
74 }
75
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
78 AU.setPreservesAll();
80 }
81};
82} // End anonymous namespace.
83
84INITIALIZE_PASS_BEGIN(GCNPreRAOptimizationsLegacy, DEBUG_TYPE,
85 "AMDGPU Pre-RA optimizations", false, false)
87INITIALIZE_PASS_END(GCNPreRAOptimizationsLegacy, DEBUG_TYPE,
88 "Pre-RA optimizations", false, false)
89
90char GCNPreRAOptimizationsLegacy::ID = 0;
91
92char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizationsLegacy::ID;
93
95 return new GCNPreRAOptimizationsLegacy();
96}
97
98bool GCNPreRAOptimizationsImpl::processReg(Register Reg) {
99 MachineInstr *Def0 = nullptr;
100 MachineInstr *Def1 = nullptr;
101 uint64_t Init = 0;
102 bool Changed = false;
103 SmallSet<Register, 32> ModifiedRegs;
104 bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
105
106 for (MachineInstr &I : MRI->def_instructions(Reg)) {
107 switch (I.getOpcode()) {
108 default:
109 return false;
110 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
111 break;
112 case AMDGPU::COPY: {
113 // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
114 // intermdiate temporary VGPR register. Try to find the defining
115 // accvgpr_write to avoid temporary registers.
116
117 if (!IsAGPRDst)
118 return false;
119
120 Register SrcReg = I.getOperand(1).getReg();
121
122 if (!SrcReg.isVirtual())
123 break;
124
125 // Check if source of copy is from another AGPR.
126 bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
127 if (!IsAGPRSrc)
128 break;
129
130 // def_instructions() does not look at subregs so it may give us a
131 // different instruction that defines the same vreg but different subreg
132 // so we have to manually check subreg.
133 Register SrcSubReg = I.getOperand(1).getSubReg();
134 for (auto &Def : MRI->def_instructions(SrcReg)) {
135 if (SrcSubReg != Def.getOperand(0).getSubReg())
136 continue;
137
138 if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
139 MachineOperand DefSrcMO = Def.getOperand(1);
140
141 // Immediates are not an issue and can be propagated in
142 // postrapseudos pass. Only handle cases where defining
143 // accvgpr_write source is a vreg.
144 if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
145 // Propagate source reg of accvgpr write to this copy instruction
146 I.getOperand(1).setReg(DefSrcMO.getReg());
147 I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
148
149 // Reg uses were changed, collect unique set of registers to update
150 // live intervals at the end.
151 ModifiedRegs.insert(DefSrcMO.getReg());
152 ModifiedRegs.insert(SrcReg);
153
154 Changed = true;
155 }
156
157 // Found the defining accvgpr_write, stop looking any further.
158 break;
159 }
160 }
161 break;
162 }
163 case AMDGPU::S_MOV_B32:
164 if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
165 I.getNumOperands() != 2)
166 return false;
167
168 switch (I.getOperand(0).getSubReg()) {
169 default:
170 return false;
171 case AMDGPU::sub0:
172 if (Def0)
173 return false;
174 Def0 = &I;
175 Init |= Lo_32(I.getOperand(1).getImm());
176 break;
177 case AMDGPU::sub1:
178 if (Def1)
179 return false;
180 Def1 = &I;
181 Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
182 break;
183 }
184 break;
185 }
186 }
187
188 // For AGPR reg, check if live intervals need to be updated.
189 if (IsAGPRDst) {
190 if (Changed) {
191 for (Register RegToUpdate : ModifiedRegs) {
192 LIS->removeInterval(RegToUpdate);
193 LIS->createAndComputeVirtRegInterval(RegToUpdate);
194 }
195 }
196
197 return Changed;
198 }
199
200 // For SGPR reg, check if we can combine instructions.
201 if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
202 return Changed;
203
204 LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
205 << " =>\n");
206
208 LIS->getInstructionIndex(*Def0)))
209 std::swap(Def0, Def1);
210
211 LIS->RemoveMachineInstrFromMaps(*Def0);
212 LIS->RemoveMachineInstrFromMaps(*Def1);
213 auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
214 TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
215 .addImm(Init);
216
217 Def0->eraseFromParent();
218 Def1->eraseFromParent();
219 LIS->InsertMachineInstrInMaps(*NewI);
220 LIS->removeInterval(Reg);
222
223 LLVM_DEBUG(dbgs() << " " << *NewI);
224
225 return true;
226}
227
228bool GCNPreRAOptimizationsLegacy::runOnMachineFunction(MachineFunction &MF) {
229 if (skipFunction(MF.getFunction()))
230 return false;
231 LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
232 return GCNPreRAOptimizationsImpl(LIS).run(MF);
233}
234
235PreservedAnalyses
239 GCNPreRAOptimizationsImpl(LIS).run(MF);
240 return PreservedAnalyses::all();
241}
242
243bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
244 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
245 TII = ST.getInstrInfo();
246 MRI = &MF.getRegInfo();
247 TRI = ST.getRegisterInfo();
248
249 bool Changed = false;
250
251 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
253 if (!LIS->hasInterval(Reg))
254 continue;
255 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
256 if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
257 (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
258 continue;
259
260 Changed |= processReg(Reg);
261 }
262
263 if (!ST.useRealTrue16Insts())
264 return Changed;
265
266 // Add RA hints to improve True16 COPY elimination.
267 for (const MachineBasicBlock &MBB : MF) {
268 for (const MachineInstr &MI : MBB) {
269 if (MI.getOpcode() != AMDGPU::COPY)
270 continue;
271 Register Dst = MI.getOperand(0).getReg();
272 Register Src = MI.getOperand(1).getReg();
273 if (Dst.isVirtual() &&
274 MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
275 Src.isPhysical() &&
276 TRI->getRegClassForReg(*MRI, Src) == &AMDGPU::VGPR_32RegClass)
277 MRI->setRegAllocationHint(Dst, 0, TRI->getSubReg(Src, AMDGPU::lo16));
278 if (Src.isVirtual() &&
279 MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass &&
280 Dst.isPhysical() &&
281 TRI->getRegClassForReg(*MRI, Dst) == &AMDGPU::VGPR_32RegClass)
282 MRI->setRegAllocationHint(Src, 0, TRI->getSubReg(Dst, AMDGPU::lo16));
283 if (!Dst.isVirtual() || !Src.isVirtual())
284 continue;
285 if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_32RegClass &&
286 MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass) {
287 MRI->setRegAllocationHint(Dst, AMDGPURI::Size32, Src);
288 MRI->setRegAllocationHint(Src, AMDGPURI::Size16, Dst);
289 }
290 if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
291 MRI->getRegClass(Src) == &AMDGPU::VGPR_32RegClass)
292 MRI->setRegAllocationHint(Dst, AMDGPURI::Size16, Src);
293 }
294 }
295
296 return Changed;
297}
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Interface definition for SIRegisterInfo.
#define LLVM_DEBUG(...)
Definition Debug.h:114
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:67
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCRegisterClass * MC
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
char & GCNPreRAOptimizationsID
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void initializeGCNPreRAOptimizationsLegacyPass(PassRegistry &)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
FunctionPass * createGCNPreRAOptimizationsLegacyPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853