LLVM 22.0.0git
AArch64StackTaggingPreRA.cpp
Go to the documentation of this file.
1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64.h"
10#include "AArch64InstrInfo.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/Passes.h"
25#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
31
33
35 "stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe),
37 "Unconditionally apply unchecked-ld-st optimization (even for large "
38 "stack frames, or in the presence of variable sized allocas)."),
40 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
42 UncheckedSafe, "safe",
43 "apply unchecked-ld-st when the target is definitely within range"),
44 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
45
46static cl::opt<bool>
47 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
48 cl::desc("Apply first slot optimization for stack tagging "
49 "(eliminate ADDG Rt, Rn, 0, 0)."));
50
51namespace {
52
53class AArch64StackTaggingPreRA : public MachineFunctionPass {
59 const AArch64InstrInfo *TII;
60
62
63public:
64 static char ID;
65 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {}
66
67 bool mayUseUncheckedLoadStore();
68 void uncheckUsesOf(unsigned TaggedReg, int FI);
69 void uncheckLoadsAndStores();
70 std::optional<int> findFirstSlotCandidate();
71
72 bool runOnMachineFunction(MachineFunction &Func) override;
73 StringRef getPassName() const override {
74 return "AArch64 Stack Tagging PreRA";
75 }
76
77 void getAnalysisUsage(AnalysisUsage &AU) const override {
78 AU.setPreservesCFG();
80 }
81};
82} // end anonymous namespace
83
84char AArch64StackTaggingPreRA::ID = 0;
85
86INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
87 "AArch64 Stack Tagging PreRA Pass", false, false)
88INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
89 "AArch64 Stack Tagging PreRA Pass", false, false)
90
92 return new AArch64StackTaggingPreRA();
93}
94
95static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
96 switch (Opcode) {
97 case AArch64::LDRBBui:
98 case AArch64::LDRHHui:
99 case AArch64::LDRWui:
100 case AArch64::LDRXui:
101
102 case AArch64::LDRBui:
103 case AArch64::LDRHui:
104 case AArch64::LDRSui:
105 case AArch64::LDRDui:
106 case AArch64::LDRQui:
107
108 case AArch64::LDRSHWui:
109 case AArch64::LDRSHXui:
110
111 case AArch64::LDRSBWui:
112 case AArch64::LDRSBXui:
113
114 case AArch64::LDRSWui:
115
116 case AArch64::STRBBui:
117 case AArch64::STRHHui:
118 case AArch64::STRWui:
119 case AArch64::STRXui:
120
121 case AArch64::STRBui:
122 case AArch64::STRHui:
123 case AArch64::STRSui:
124 case AArch64::STRDui:
125 case AArch64::STRQui:
126
127 case AArch64::LDPWi:
128 case AArch64::LDPXi:
129 case AArch64::LDPSi:
130 case AArch64::LDPDi:
131 case AArch64::LDPQi:
132
133 case AArch64::LDPSWi:
134
135 case AArch64::STPWi:
136 case AArch64::STPXi:
137 case AArch64::STPSi:
138 case AArch64::STPDi:
139 case AArch64::STPQi:
140 return true;
141 default:
142 return false;
143 }
144}
145
146bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
148 return false;
150 return true;
151
152 // This estimate can be improved if we had harder guarantees about stack frame
153 // layout. With LocalStackAllocation we can estimate SP offset to any
154 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
155 // objects ahead of non-tagged ones, but that's not always desirable.
156 //
157 // Underestimating SP offset here may require the use of LDG to materialize
158 // the tagged address of the stack slot, along with a scratch register
159 // allocation (post-regalloc!).
160 //
161 // For now we do the safe thing here and require that the entire stack frame
162 // is within range of the shortest of the unchecked instructions.
163 unsigned FrameSize = 0;
164 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
165 FrameSize += MFI->getObjectSize(i);
166 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
167 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
168}
169
170void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
171 for (MachineInstr &UseI :
172 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
173 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
174 // FI operand is always the one before the immediate offset.
175 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
176 if (UseI.getOperand(OpIdx).isReg() &&
177 UseI.getOperand(OpIdx).getReg() == TaggedReg) {
178 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
179 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
180 }
181 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
182 uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
183 }
184 }
185}
186
187void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
188 for (auto *I : ReTags) {
189 Register TaggedReg = I->getOperand(0).getReg();
190 int FI = I->getOperand(1).getIndex();
191 uncheckUsesOf(TaggedReg, FI);
192 }
193}
194
195namespace {
196struct SlotWithTag {
197 int FI;
198 int Tag;
199 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
200 explicit SlotWithTag(const MachineInstr &MI)
201 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
202 bool operator==(const SlotWithTag &Other) const {
203 return FI == Other.FI && Tag == Other.Tag;
204 }
205};
206} // namespace
207
208namespace llvm {
209template <> struct DenseMapInfo<SlotWithTag> {
210 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
211 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
212 static unsigned getHashValue(const SlotWithTag &V) {
215 }
216 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
217 return A == B;
218 }
219};
220} // namespace llvm
221
222static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
223 return MFI->getUseLocalStackAllocationBlock() &&
224 MFI->isObjectPreAllocated(FI);
225}
226
227// Pin one of the tagged slots to offset 0 from the tagged base pointer.
228// This would make its address available in a virtual register (IRG's def), as
229// opposed to requiring an ADDG instruction to materialize. This effectively
230// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
231// live almost everywhere anyway), and therefore needs to happen before
232// regalloc.
233std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
234 // Find the best (FI, Tag) pair to pin to offset 0.
235 // Looking at the possible uses of a tagged address, the advantage of pinning
236 // is:
237 // - COPY to physical register.
238 // Does not matter, this would trade a MOV instruction for an ADDG.
239 // - ST*G matter, but those mostly appear near the function prologue where all
240 // the tagged addresses need to be materialized anyway; also, counting ST*G
241 // uses would overweight large allocas that require more than one ST*G
242 // instruction.
243 // - Load/Store instructions in the address operand do not require a tagged
244 // pointer, so they also do not benefit. These operands have already been
245 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
246 // instructions count.
247 // - Any other instruction may benefit from being pinned to offset 0.
248 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
249 if (!ClFirstSlot)
250 return std::nullopt;
251
252 DenseMap<SlotWithTag, int> RetagScore;
253 SlotWithTag MaxScoreST{-1, -1};
254 int MaxScore = -1;
255 for (auto *I : ReTags) {
256 SlotWithTag ST{*I};
257 if (isSlotPreAllocated(MFI, ST.FI))
258 continue;
259
260 Register RetagReg = I->getOperand(0).getReg();
261 if (!RetagReg.isVirtual())
262 continue;
263
264 int Score = 0;
266 WorkList.push_back(RetagReg);
267
268 while (!WorkList.empty()) {
269 Register UseReg = WorkList.pop_back_val();
270 for (auto &UseI : MRI->use_instructions(UseReg)) {
271 unsigned Opcode = UseI.getOpcode();
272 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
273 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
274 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
275 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
276 Opcode == AArch64::STZGloop_wback)
277 continue;
278 if (UseI.isCopy()) {
279 Register DstReg = UseI.getOperand(0).getReg();
280 if (DstReg.isVirtual())
281 WorkList.push_back(DstReg);
282 continue;
283 }
284 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of "
285 << printReg(UseReg) << " in " << UseI << "\n");
286 Score++;
287 }
288 }
289
290 int TotalScore = RetagScore[ST] += Score;
291 if (TotalScore > MaxScore ||
292 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
293 MaxScore = TotalScore;
294 MaxScoreST = ST;
295 }
296 }
297
298 if (MaxScoreST.FI < 0)
299 return std::nullopt;
300
301 // If FI's tag is already 0, we are done.
302 if (MaxScoreST.Tag == 0)
303 return MaxScoreST.FI;
304
305 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
306 SlotWithTag SwapST{-1, -1};
307 for (auto *I : ReTags) {
308 SlotWithTag ST{*I};
309 if (ST.Tag == 0) {
310 SwapST = ST;
311 break;
312 }
313 }
314
315 // Swap tags between the victim and the highest scoring pair.
316 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
317 // the highest score slot without changing anything else.
318 for (auto *&I : ReTags) {
319 SlotWithTag ST{*I};
320 MachineOperand &TagOp = I->getOperand(4);
321 if (ST == MaxScoreST) {
322 TagOp.setImm(0);
323 } else if (ST == SwapST) {
324 TagOp.setImm(MaxScoreST.Tag);
325 }
326 }
327 return MaxScoreST.FI;
328}
329
330bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
331 MF = &Func;
332 MRI = &MF->getRegInfo();
333 AFI = MF->getInfo<AArch64FunctionInfo>();
334 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
335 TRI = static_cast<const AArch64RegisterInfo *>(
336 MF->getSubtarget().getRegisterInfo());
337 MFI = &MF->getFrameInfo();
338 ReTags.clear();
339
340 assert(MRI->isSSA());
341
342 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
343 << "********** Function: " << MF->getName() << '\n');
344
345 SmallSetVector<int, 8> TaggedSlots;
346 for (auto &BB : *MF) {
347 for (auto &I : BB) {
348 if (I.getOpcode() == AArch64::TAGPstack) {
349 ReTags.push_back(&I);
350 int FI = I.getOperand(1).getIndex();
351 TaggedSlots.insert(FI);
352 // There should be no offsets in TAGP yet.
353 assert(I.getOperand(2).getImm() == 0);
354 }
355 }
356 }
357
358 // Take over from SSP. It does nothing for tagged slots, and should not really
359 // have been enabled in the first place.
360 for (int FI : TaggedSlots)
362
363 if (ReTags.empty())
364 return false;
365
366 if (mayUseUncheckedLoadStore())
367 uncheckLoadsAndStores();
368
369 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
370 // If the base tagged pointer is set up to the address of this slot,
371 // the ADDG instruction can be eliminated.
372 std::optional<int> BaseSlot = findFirstSlotCandidate();
373 if (BaseSlot)
374 AFI->setTaggedBasePointerIndex(*BaseSlot);
375
376 for (auto *I : ReTags) {
377 int FI = I->getOperand(1).getIndex();
378 int Tag = I->getOperand(4).getImm();
379 Register Base = I->getOperand(3).getReg();
380 if (Tag == 0 && FI == BaseSlot) {
381 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
382 I->getOperand(0).getReg())
383 .addReg(Base);
385 }
386 }
387
388 return true;
389}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI)
static cl::opt< UncheckedLdStMode > ClUncheckedLdSt("stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe), cl::desc("Unconditionally apply unchecked-ld-st optimization (even for large " "stack frames, or in the presence of variable sized allocas)."), cl::values(clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), clEnumValN(UncheckedSafe, "safe", "apply unchecked-ld-st when the target is definitely within range"), clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")))
static cl::opt< bool > ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), cl::desc("Apply first slot optimization for stack tagging " "(eliminate ADDG Rt, Rn, 0, 0)."))
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool isObjectPreAllocated(int ObjectIdx) const
Return true if the object was pre-allocated into the local block.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
bool getUseLocalStackAllocationBlock() const
Get whether the local allocation blob should be allocated together or let PEI allocate the locals in ...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void setImm(int64_t immVal)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64StackTaggingPreRAPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Other
Any other memory.
Definition ModRef.h:68
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:591
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static bool isEqual(const SlotWithTag &A, const SlotWithTag &B)
static unsigned getHashValue(const SlotWithTag &V)
An information struct used to provide DenseMap with the various necessary components for a given valu...