30#define DEBUG_TYPE "amdgpu-nsa-reassign"
33 "Number of NSA instructions with non-sequential address found");
35 "Number of NSA instructions changed to sequential");
38class GCNNSAReassignImpl {
41 : VRM(VM), LRM(LM), LIS(LS) {}
43 bool run(MachineFunction &MF);
46 using NSA_Status =
enum {
54 const GCNSubtarget *ST;
56 const MachineRegisterInfo *MRI;
58 const SIRegisterInfo *TRI;
70 NSA_Status CheckNSA(
const MachineInstr &
MI,
bool Fast =
false)
const;
72 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
73 unsigned StartReg)
const;
75 bool canAssign(
unsigned StartReg,
unsigned NumRegs)
const;
77 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals)
const;
84 GCNNSAReassignLegacy() : MachineFunctionPass(ID) {
88 bool runOnMachineFunction(MachineFunction &MF)
override;
90 StringRef getPassName()
const override {
return "GCN NSA Reassign"; };
92 void getAnalysisUsage(AnalysisUsage &AU)
const override {
111char GCNNSAReassignLegacy::
ID = 0;
115bool GCNNSAReassignImpl::tryAssignRegisters(
117 unsigned NumRegs = Intervals.size();
119 for (
unsigned N = 0;
N < NumRegs; ++
N)
120 if (VRM->hasPhys(Intervals[
N]->reg()))
121 LRM->unassign(*Intervals[
N]);
123 for (
unsigned N = 0;
N < NumRegs; ++
N)
127 for (
unsigned N = 0;
N < NumRegs; ++
N)
133bool GCNNSAReassignImpl::canAssign(
unsigned StartReg,
unsigned NumRegs)
const {
134 for (
unsigned N = 0;
N < NumRegs; ++
N) {
135 unsigned Reg = StartReg +
N;
136 if (!
MRI->isAllocatable(
Reg))
139 for (
unsigned I = 0; CSRegs[
I]; ++
I)
140 if (
TRI->isSubRegisterEq(
Reg, CSRegs[
I]) &&
148bool GCNNSAReassignImpl::scavengeRegs(
149 SmallVectorImpl<LiveInterval *> &Intervals)
const {
150 unsigned NumRegs = Intervals.
size();
152 if (NumRegs > MaxNumVGPRs)
154 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
156 for (
unsigned Reg = AMDGPU::VGPR0;
Reg <= MaxReg; ++
Reg) {
157 if (!canAssign(
Reg, NumRegs))
160 if (tryAssignRegisters(Intervals,
Reg))
167GCNNSAReassignImpl::NSA_Status
168GCNNSAReassignImpl::CheckNSA(
const MachineInstr &
MI,
bool Fast)
const {
171 return NSA_Status::NOT_NSA;
173 switch (
Info->MIMGEncoding) {
174 case AMDGPU::MIMGEncGfx10NSA:
175 case AMDGPU::MIMGEncGfx11NSA:
178 return NSA_Status::NOT_NSA;
182 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
184 unsigned VgprBase = 0;
186 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
187 const MachineOperand &
Op =
MI.getOperand(VAddr0Idx +
I);
190 return NSA_Status::FIXED;
196 return NSA_Status::FIXED;
207 if (
TRI->getRegSizeInBits(*
MRI->getRegClass(
Reg)) != 32 ||
Op.getSubReg())
208 return NSA_Status::FIXED;
216 return NSA_Status::FIXED;
218 const MachineInstr *
Def =
MRI->getUniqueVRegDef(
Reg);
220 if (Def &&
Def->isCopy() &&
Def->getOperand(1).getReg() == PhysReg)
221 return NSA_Status::FIXED;
223 for (
auto U :
MRI->use_nodbg_operands(
Reg)) {
225 return NSA_Status::FIXED;
226 const MachineInstr *UseInst =
U.getParent();
228 return NSA_Status::FIXED;
232 return NSA_Status::FIXED;
237 else if (VgprBase +
I != PhysReg)
241 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
244bool GCNNSAReassignImpl::run(MachineFunction &MF) {
252 const SIMachineFunctionInfo *MFI = MF.
getInfo<SIMachineFunctionInfo>();
254 MaxNumVGPRs = std::min(
257 CSRegs =
MRI->getCalleeSavedRegs();
259 using Candidate = std::pair<const MachineInstr*, bool>;
261 for (
const MachineBasicBlock &
MBB : MF) {
262 for (
const MachineInstr &
MI :
MBB) {
263 switch (CheckNSA(
MI)) {
266 case NSA_Status::CONTIGUOUS:
269 case NSA_Status::NON_CONTIGUOUS:
271 ++NumNSAInstructions;
278 for (
auto &
C : Candidates) {
282 const MachineInstr *
MI =
C.first;
283 if (CheckNSA(*
MI,
true) == NSA_Status::CONTIGUOUS) {
292 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr0);
296 SlotIndex MinInd, MaxInd;
297 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
298 const MachineOperand &
Op =
MI->getOperand(VAddr0Idx +
I);
315 MinInd =
I != 0 ? std::min(MinInd, LI->
beginIndex()) : LI->beginIndex();
316 MaxInd =
I != 0 ? std::max(MaxInd, LI->
endIndex()) : LI->endIndex();
319 if (Intervals.
empty())
323 <<
"\tOriginal allocation:\t";
329 bool Success = scavengeRegs(Intervals);
332 if (VRM->
hasPhys(Intervals.back()->reg()))
337 std::lower_bound(Candidates.begin(), &
C, MinInd,
338 [
this](
const Candidate &
C, SlotIndex
I) {
339 return LIS->getInstructionIndex(*C.first) < I;
341 for (
auto *
E = Candidates.end();
344 if (
I->second && CheckNSA(*
I->first,
true) < NSA_Status::CONTIGUOUS) {
352 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
353 if (VRM->
hasPhys(Intervals[
I]->reg()))
356 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
357 LRM->
assign(*Intervals[
I], OrigRegs[
I]);
365 dbgs() <<
"\tNew allocation:\t\t ["
376bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {
377 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
378 auto *LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
379 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
381 GCNNSAReassignImpl Impl(VRM, LRM, LIS);
392 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
bool hasNonNSAEncoding() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasNSAEncoding() const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
Register getPreSplitReg(Register virtReg) const
returns the live interval virtReg is split from.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
bool isAssignedReg(Register virtReg) const
returns true if the specified virtual register is not mapped to a stack slot or rematerialized.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
void initializeGCNNSAReassignLegacyPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.