LLVM 22.0.0git
StackColoring.cpp
Go to the documentation of this file.
1//===- StackColoring.cpp --------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the stack-coloring optimization that looks for
10// lifetime markers machine instructions (LIFETIME_START and LIFETIME_END),
11// which represent the possible lifetime of stack slots. It attempts to
12// merge disjoint stack slots and reduce the used stack space.
13// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14//
15// TODO: In the future we plan to improve stack coloring in the following ways:
16// 1. Allow merging multiple small slots into a single larger slot at different
17// offsets.
18// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19// spill slots.
20//
21//===----------------------------------------------------------------------===//
22
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/Statistic.h"
39#include "llvm/CodeGen/Passes.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Constants.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/Use.h"
50#include "llvm/IR/Value.h"
52#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <limits>
61#include <memory>
62#include <utility>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "stack-coloring"
67
68static cl::opt<bool>
69DisableColoring("no-stack-coloring",
70 cl::init(false), cl::Hidden,
71 cl::desc("Disable stack coloring"));
72
73/// The user may write code that uses allocas outside of the declared lifetime
74/// zone. This can happen when the user returns a reference to a local
75/// data-structure. We can detect these cases and decide not to optimize the
76/// code. If this flag is enabled, we try to save the user. This option
77/// is treated as overriding LifetimeStartOnFirstUse below.
78static cl::opt<bool>
79ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80 cl::init(false), cl::Hidden,
81 cl::desc("Do not optimize lifetime zones that "
82 "are broken"));
83
84/// Enable enhanced dataflow scheme for lifetime analysis (treat first
85/// use of stack slot as start of slot lifetime, as opposed to looking
86/// for LIFETIME_START marker). See "Implementation notes" below for
87/// more info.
88static cl::opt<bool>
89LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90 cl::init(true), cl::Hidden,
91 cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92
93
94STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98
99//===----------------------------------------------------------------------===//
100// StackColoring Pass
101//===----------------------------------------------------------------------===//
102//
103// Stack Coloring reduces stack usage by merging stack slots when they
104// can't be used together. For example, consider the following C program:
105//
106// void bar(char *, int);
107// void foo(bool var) {
108// A: {
109// char z[4096];
110// bar(z, 0);
111// }
112//
113// char *p;
114// char x[4096];
115// char y[4096];
116// if (var) {
117// p = x;
118// } else {
119// bar(y, 1);
120// p = y + 1024;
121// }
122// B:
123// bar(p, 2);
124// }
125//
126// Naively-compiled, this program would use 12k of stack space. However, the
127// stack slot corresponding to `z` is always destroyed before either of the
128// stack slots for `x` or `y` are used, and then `x` is only used if `var`
129// is true, while `y` is only used if `var` is false. So in no time are 2
130// of the stack slots used together, and therefore we can merge them,
131// compiling the function using only a single 4k alloca:
132//
133// void foo(bool var) { // equivalent
134// char x[4096];
135// char *p;
136// bar(x, 0);
137// if (var) {
138// p = x;
139// } else {
140// bar(x, 1);
141// p = x + 1024;
142// }
143// bar(p, 2);
144// }
145//
146// This is an important optimization if we want stack space to be under
147// control in large functions, both open-coded ones and ones created by
148// inlining.
149//
150// Implementation Notes:
151// ---------------------
152//
153// An important part of the above reasoning is that `z` can't be accessed
154// while the latter 2 calls to `bar` are running. This is justified because
155// `z`'s lifetime is over after we exit from block `A:`, so any further
156// accesses to it would be UB. The way we represent this information
157// in LLVM is by having frontends delimit blocks with `lifetime.start`
158// and `lifetime.end` intrinsics.
159//
160// The effect of these intrinsics seems to be as follows (maybe I should
161// specify this in the reference?):
162//
163// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164// lifetime intrinsic refers to that stack slot, in which case
165// it is marked as *in-scope*.
166// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167// the stack slot is overwritten with `undef`.
168// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169// L4) on function exit, all stack slots are marked as *out-of-scope*.
170// L5) `lifetime.end` is a no-op when called on a slot that is already
171// *out-of-scope*.
172// L6) memory accesses to *out-of-scope* stack slots are UB.
173// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174// are invalidated, unless the slot is "degenerate". This is used to
175// justify not marking slots as in-use until the pointer to them is
176// used, but feels a bit hacky in the presence of things like LICM. See
177// the "Degenerate Slots" section for more details.
178//
179// Now, let's ground stack coloring on these rules. We'll define a slot
180// as *in-use* at a (dynamic) point in execution if it either can be
181// written to at that point, or if it has a live and non-undef content
182// at that point.
183//
184// Obviously, slots that are never *in-use* together can be merged, and
185// in our example `foo`, the slots for `x`, `y` and `z` are never
186// in-use together (of course, sometimes slots that *are* in-use together
187// might still be mergable, but we don't care about that here).
188//
189// In this implementation, we successively merge pairs of slots that are
190// not *in-use* together. We could be smarter - for example, we could merge
191// a single large slot with 2 small slots, or we could construct the
192// interference graph and run a "smart" graph coloring algorithm, but with
193// that aside, how do we find out whether a pair of slots might be *in-use*
194// together?
195//
196// From our rules, we see that *out-of-scope* slots are never *in-use*,
197// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198// until their address is taken. Therefore, we can approximate slot activity
199// using dataflow.
200//
201// A subtle point: naively, we might try to figure out which pairs of
202// stack-slots interfere by propagating `S in-use` through the CFG for every
203// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204// which they are both *in-use*.
205//
206// That is sound, but overly conservative in some cases: in our (artificial)
207// example `foo`, either `x` or `y` might be in use at the label `B:`, but
208// as `x` is only in use if we came in from the `var` edge and `y` only
209// if we came from the `!var` edge, they still can't be in use together.
210// See PR32488 for an important real-life case.
211//
212// If we wanted to find all points of interference precisely, we could
213// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214// would be precise, but requires propagating `O(n^2)` dataflow facts.
215//
216// However, we aren't interested in the *set* of points of interference
217// between 2 stack slots, only *whether* there *is* such a point. So we
218// can rely on a little trick: for `S` and `T` to be in-use together,
219// one of them needs to become in-use while the other is in-use (or
220// they might both become in use simultaneously). We can check this
221// by also keeping track of the points at which a stack slot might *start*
222// being in-use.
223//
224// Exact first use:
225// ----------------
226//
227// Consider the following motivating example:
228//
229// int foo() {
230// char b1[1024], b2[1024];
231// if (...) {
232// char b3[1024];
233// <uses of b1, b3>;
234// return x;
235// } else {
236// char b4[1024], b5[1024];
237// <uses of b2, b4, b5>;
238// return y;
239// }
240// }
241//
242// In the code above, "b3" and "b4" are declared in distinct lexical
243// scopes, meaning that it is easy to prove that they can share the
244// same stack slot. Variables "b1" and "b2" are declared in the same
245// scope, meaning that from a lexical point of view, their lifetimes
246// overlap. From a control flow pointer of view, however, the two
247// variables are accessed in disjoint regions of the CFG, thus it
248// should be possible for them to share the same stack slot. An ideal
249// stack allocation for the function above would look like:
250//
251// slot 0: b1, b2
252// slot 1: b3, b4
253// slot 2: b5
254//
255// Achieving this allocation is tricky, however, due to the way
256// lifetime markers are inserted. Here is a simplified view of the
257// control flow graph for the code above:
258//
259// +------ block 0 -------+
260// 0| LIFETIME_START b1, b2 |
261// 1| <test 'if' condition> |
262// +-----------------------+
263// ./ \.
264// +------ block 1 -------+ +------ block 2 -------+
265// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268// +-----------------------+ +-----------------------+
269// \. /.
270// +------ block 3 -------+
271// 8| <cleanupcode> |
272// 9| LIFETIME_END b1, b2 |
273// 10| return |
274// +-----------------------+
275//
276// If we create live intervals for the variables above strictly based
277// on the lifetime markers, we'll get the set of intervals on the
278// left. If we ignore the lifetime start markers and instead treat a
279// variable's lifetime as beginning with the first reference to the
280// var, then we get the intervals on the right.
281//
282// LIFETIME_START First Use
283// b1: [0,9] [3,4] [8,9]
284// b2: [0,9] [6,9]
285// b3: [2,4] [3,4]
286// b4: [5,7] [6,7]
287// b5: [5,7] [6,7]
288//
289// For the intervals on the left, the best we can do is overlap two
290// variables (b3 and b4, for example); this gives us a stack size of
291// 4*1024 bytes, not ideal. When treating first-use as the start of a
292// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293// byte stack (better).
294//
295// Degenerate Slots:
296// -----------------
297//
298// Relying entirely on first-use of stack slots is problematic,
299// however, due to the fact that optimizations can sometimes migrate
300// uses of a variable outside of its lifetime start/end region. Here
301// is an example:
302//
303// int bar() {
304// char b1[1024], b2[1024];
305// if (...) {
306// <uses of b2>
307// return y;
308// } else {
309// <uses of b1>
310// while (...) {
311// char b3[1024];
312// <uses of b3>
313// }
314// }
315// }
316//
317// Before optimization, the control flow graph for the code above
318// might look like the following:
319//
320// +------ block 0 -------+
321// 0| LIFETIME_START b1, b2 |
322// 1| <test 'if' condition> |
323// +-----------------------+
324// ./ \.
325// +------ block 1 -------+ +------- block 2 -------+
326// 2| <uses of b2> | 3| <uses of b1> |
327// +-----------------------+ +-----------------------+
328// | |
329// | +------- block 3 -------+ <-\.
330// | 4| <while condition> | |
331// | +-----------------------+ |
332// | / | |
333// | / +------- block 4 -------+
334// \ / 5| LIFETIME_START b3 | |
335// \ / 6| <uses of b3> | |
336// \ / 7| LIFETIME_END b3 | |
337// \ | +------------------------+ |
338// \ | \ /
339// +------ block 5 -----+ \---------------
340// 8| <cleanupcode> |
341// 9| LIFETIME_END b1, b2 |
342// 10| return |
343// +---------------------+
344//
345// During optimization, however, it can happen that an instruction
346// computing an address in "b3" (for example, a loop-invariant GEP) is
347// hoisted up out of the loop from block 4 to block 2. [Note that
348// this is not an actual load from the stack, only an instruction that
349// computes the address to be loaded]. If this happens, there is now a
350// path leading from the first use of b3 to the return instruction
351// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352// now larger than if we were computing live intervals strictly based
353// on lifetime markers. In the example above, this lengthened lifetime
354// would mean that it would appear illegal to overlap b3 with b2.
355//
356// To deal with this such cases, the code in ::collectMarkers() below
357// tries to identify "degenerate" slots -- those slots where on a single
358// forward pass through the CFG we encounter a first reference to slot
359// K before we hit the slot K lifetime start marker. For such slots,
360// we fall back on using the lifetime start marker as the beginning of
361// the variable's lifetime. NB: with this implementation, slots can
362// appear degenerate in cases where there is unstructured control flow:
363//
364// if (q) goto mid;
365// if (x > 9) {
366// int b[100];
367// memcpy(&b[0], ...);
368// mid: b[k] = ...;
369// abc(&b);
370// }
371//
372// If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373// before visiting the memcpy block (which will contain the lifetime start
374// for "b" then it will appear that 'b' has a degenerate lifetime.
375
376namespace {
377
378/// StackColoring - A machine pass for merging disjoint stack allocations,
379/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
380class StackColoring {
381 MachineFrameInfo *MFI = nullptr;
382 MachineFunction *MF = nullptr;
383
384 /// A class representing liveness information for a single basic block.
385 /// Each bit in the BitVector represents the liveness property
386 /// for a different stack slot.
387 struct BlockLifetimeInfo {
388 /// Which slots BEGINs in each basic block.
389 BitVector Begin;
390
391 /// Which slots ENDs in each basic block.
392 BitVector End;
393
394 /// Which slots are marked as LIVE_IN, coming into each basic block.
395 BitVector LiveIn;
396
397 /// Which slots are marked as LIVE_OUT, coming out of each basic block.
398 BitVector LiveOut;
399 };
400
401 /// Maps active slots (per bit) for each basic block.
402 using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
403 LivenessMap BlockLiveness;
404
405 /// Maps serial numbers to basic blocks.
406 DenseMap<const MachineBasicBlock *, int> BasicBlocks;
407
408 /// Maps basic blocks to a serial number.
410
411 /// Maps slots to their use interval. Outside of this interval, slots
412 /// values are either dead or `undef` and they will not be written to.
414
415 /// Maps slots to the points where they can become in-use.
417
418 /// VNInfo is used for the construction of LiveIntervals.
419 VNInfo::Allocator VNInfoAllocator;
420
421 /// SlotIndex analysis object.
422 SlotIndexes *Indexes = nullptr;
423
424 /// The list of lifetime markers found. These markers are to be removed
425 /// once the coloring is done.
426 SmallVector<MachineInstr*, 8> Markers;
427
428 /// Record the FI slots for which we have seen some sort of
429 /// lifetime marker (either start or end).
430 BitVector InterestingSlots;
431
432 /// FI slots that need to be handled conservatively (for these
433 /// slots lifetime-start-on-first-use is disabled).
434 BitVector ConservativeSlots;
435
436 /// Number of iterations taken during data flow analysis.
437 unsigned NumIterations;
438
439public:
440 StackColoring(SlotIndexes *Indexes) : Indexes(Indexes) {}
441 bool run(MachineFunction &Func);
442
443private:
444 /// Used in collectMarkers
445 using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
446
447 /// Debug.
448 void dump() const;
449 void dumpIntervals() const;
450 void dumpBB(MachineBasicBlock *MBB) const;
451 void dumpBV(const char *tag, const BitVector &BV) const;
452
453 /// Removes all of the lifetime marker instructions from the function.
454 /// \returns true if any markers were removed.
455 bool removeAllMarkers();
456
457 /// Scan the machine function and find all of the lifetime markers.
458 /// Record the findings in the BEGIN and END vectors.
459 /// \returns the number of markers found.
460 unsigned collectMarkers(unsigned NumSlot);
461
462 /// Perform the dataflow calculation and calculate the lifetime for each of
463 /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
464 /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
465 /// in and out blocks.
466 void calculateLocalLiveness();
467
468 /// Returns TRUE if we're using the first-use-begins-lifetime method for
469 /// this slot (if FALSE, then the start marker is treated as start of lifetime).
470 bool applyFirstUse(int Slot) {
472 return false;
473 if (ConservativeSlots.test(Slot))
474 return false;
475 return true;
476 }
477
478 /// Examines the specified instruction and returns TRUE if the instruction
479 /// represents the start or end of an interesting lifetime. The slot or slots
480 /// starting or ending are added to the vector "slots" and "isStart" is set
481 /// accordingly.
482 /// \returns True if inst contains a lifetime start or end
483 bool isLifetimeStartOrEnd(const MachineInstr &MI,
484 SmallVector<int, 4> &slots,
485 bool &isStart);
486
487 /// Construct the LiveIntervals for the slots.
488 void calculateLiveIntervals(unsigned NumSlots);
489
490 /// Go over the machine function and change instructions which use stack
491 /// slots to use the joint slots.
492 void remapInstructions(DenseMap<int, int> &SlotRemap);
493
494 /// The input program may contain instructions which are not inside lifetime
495 /// markers. This can happen due to a bug in the compiler or due to a bug in
496 /// user code (for example, returning a reference to a local variable).
497 /// This procedure checks all of the instructions in the function and
498 /// invalidates lifetime ranges which do not contain all of the instructions
499 /// which access that frame slot.
500 void removeInvalidSlotRanges();
501
502 /// Map entries which point to other entries to their destination.
503 /// A->B->C becomes A->C.
504 void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
505};
506
507class StackColoringLegacy : public MachineFunctionPass {
508public:
509 static char ID;
510
511 StackColoringLegacy() : MachineFunctionPass(ID) {}
512
513 void getAnalysisUsage(AnalysisUsage &AU) const override;
514 bool runOnMachineFunction(MachineFunction &Func) override;
515};
516
517} // end anonymous namespace
518
519char StackColoringLegacy::ID = 0;
520
521char &llvm::StackColoringLegacyID = StackColoringLegacy::ID;
522
524 "Merge disjoint stack slots", false, false)
526INITIALIZE_PASS_END(StackColoringLegacy, DEBUG_TYPE,
527 "Merge disjoint stack slots", false, false)
528
529void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
530 AU.addRequired<SlotIndexesWrapperPass>();
532}
533
534#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
535LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
536 const BitVector &BV) const {
537 dbgs() << tag << " : { ";
538 for (unsigned I = 0, E = BV.size(); I != E; ++I)
539 dbgs() << BV.test(I) << " ";
540 dbgs() << "}\n";
541}
542
543LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
544 LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
545 assert(BI != BlockLiveness.end() && "Block not found");
546 const BlockLifetimeInfo &BlockInfo = BI->second;
547
548 dumpBV("BEGIN", BlockInfo.Begin);
549 dumpBV("END", BlockInfo.End);
550 dumpBV("LIVE_IN", BlockInfo.LiveIn);
551 dumpBV("LIVE_OUT", BlockInfo.LiveOut);
552}
553
554LLVM_DUMP_METHOD void StackColoring::dump() const {
555 for (MachineBasicBlock *MBB : depth_first(MF)) {
556 dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
557 << MBB->getName() << "]\n";
558 dumpBB(MBB);
559 }
560}
561
562LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
563 for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
564 dbgs() << "Interval[" << I << "]:\n";
565 Intervals[I]->dump();
566 }
567}
568#endif
569
570static inline int getStartOrEndSlot(const MachineInstr &MI)
571{
572 assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
573 MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
574 "Expected LIFETIME_START or LIFETIME_END op");
575 const MachineOperand &MO = MI.getOperand(0);
576 int Slot = MO.getIndex();
577 if (Slot >= 0)
578 return Slot;
579 return -1;
580}
581
582// At the moment the only way to end a variable lifetime is with
583// a VARIABLE_LIFETIME op (which can't contain a start). If things
584// change and the IR allows for a single inst that both begins
585// and ends lifetime(s), this interface will need to be reworked.
586bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
587 SmallVector<int, 4> &slots,
588 bool &isStart) {
589 if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
590 MI.getOpcode() == TargetOpcode::LIFETIME_END) {
592 if (Slot < 0)
593 return false;
594 if (!InterestingSlots.test(Slot))
595 return false;
596 slots.push_back(Slot);
597 if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
598 isStart = false;
599 return true;
600 }
601 if (!applyFirstUse(Slot)) {
602 isStart = true;
603 return true;
604 }
606 if (!MI.isDebugInstr()) {
607 bool found = false;
608 for (const MachineOperand &MO : MI.operands()) {
609 if (!MO.isFI())
610 continue;
611 int Slot = MO.getIndex();
612 if (Slot<0)
613 continue;
614 if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
615 slots.push_back(Slot);
616 found = true;
617 }
618 }
619 if (found) {
620 isStart = true;
621 return true;
622 }
623 }
624 }
625 return false;
626}
627
628unsigned StackColoring::collectMarkers(unsigned NumSlot) {
629 unsigned MarkersFound = 0;
630 BlockBitVecMap SeenStartMap;
631 InterestingSlots.clear();
632 InterestingSlots.resize(NumSlot);
633 ConservativeSlots.clear();
634 ConservativeSlots.resize(NumSlot);
635
636 // number of start and end lifetime ops for each slot
637 SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
638 SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
639
640 // Step 1: collect markers and populate the "InterestingSlots"
641 // and "ConservativeSlots" sets.
642 for (MachineBasicBlock *MBB : depth_first(MF)) {
643 // Compute the set of slots for which we've seen a START marker but have
644 // not yet seen an END marker at this point in the walk (e.g. on entry
645 // to this bb).
646 BitVector BetweenStartEnd;
647 BetweenStartEnd.resize(NumSlot);
648 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
649 BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
650 if (I != SeenStartMap.end()) {
651 BetweenStartEnd |= I->second;
652 }
653 }
654
655 // Walk the instructions in the block to look for start/end ops.
656 for (MachineInstr &MI : *MBB) {
657 if (MI.isDebugInstr())
658 continue;
659 if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
660 MI.getOpcode() == TargetOpcode::LIFETIME_END) {
662 if (Slot < 0)
663 continue;
664 InterestingSlots.set(Slot);
665 if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
666 BetweenStartEnd.set(Slot);
667 NumStartLifetimes[Slot] += 1;
668 } else {
669 BetweenStartEnd.reset(Slot);
670 NumEndLifetimes[Slot] += 1;
671 }
672 const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
673 if (Allocation) {
674 LLVM_DEBUG(dbgs() << "Found a lifetime ");
675 LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
676 ? "start"
677 : "end"));
678 LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
680 << " with allocation: " << Allocation->getName() << "\n");
681 }
682 Markers.push_back(&MI);
683 MarkersFound += 1;
684 } else {
685 for (const MachineOperand &MO : MI.operands()) {
686 if (!MO.isFI())
687 continue;
688 int Slot = MO.getIndex();
689 if (Slot < 0)
690 continue;
691 if (! BetweenStartEnd.test(Slot)) {
692 ConservativeSlots.set(Slot);
693 }
694 }
695 }
696 }
697 BitVector &SeenStart = SeenStartMap[MBB];
698 SeenStart |= BetweenStartEnd;
699 }
700 if (!MarkersFound) {
701 return 0;
702 }
703
704 // PR27903: slots with multiple start or end lifetime ops are not
705 // safe to enable for "lifetime-start-on-first-use".
706 for (unsigned slot = 0; slot < NumSlot; ++slot) {
707 if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
708 ConservativeSlots.set(slot);
709 }
710
711 // The write to the catch object by the personality function is not propely
712 // modeled in IR: It happens before any cleanuppads are executed, even if the
713 // first mention of the catch object is in a catchpad. As such, mark catch
714 // object slots as conservative, so they are excluded from first-use analysis.
715 if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
716 for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
717 for (WinEHHandlerType &H : TBME.HandlerArray)
718 if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
719 H.CatchObj.FrameIndex >= 0)
720 ConservativeSlots.set(H.CatchObj.FrameIndex);
721
722 LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
723
724 // Step 2: compute begin/end sets for each block
725
726 // NOTE: We use a depth-first iteration to ensure that we obtain a
727 // deterministic numbering.
728 for (MachineBasicBlock *MBB : depth_first(MF)) {
729 // Assign a serial number to this basic block.
730 BasicBlocks[MBB] = BasicBlockNumbering.size();
731 BasicBlockNumbering.push_back(MBB);
732
733 // Keep a reference to avoid repeated lookups.
734 BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
735
736 BlockInfo.Begin.resize(NumSlot);
737 BlockInfo.End.resize(NumSlot);
738
739 SmallVector<int, 4> slots;
740 for (MachineInstr &MI : *MBB) {
741 bool isStart = false;
742 slots.clear();
743 if (isLifetimeStartOrEnd(MI, slots, isStart)) {
744 if (!isStart) {
745 assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
746 int Slot = slots[0];
747 if (BlockInfo.Begin.test(Slot)) {
748 BlockInfo.Begin.reset(Slot);
749 }
750 BlockInfo.End.set(Slot);
751 } else {
752 for (auto Slot : slots) {
753 LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
755 << " at " << printMBBReference(*MBB) << " index ");
757 const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
758 if (Allocation) {
760 << " with allocation: " << Allocation->getName());
761 }
762 LLVM_DEBUG(dbgs() << "\n");
763 if (BlockInfo.End.test(Slot)) {
764 BlockInfo.End.reset(Slot);
765 }
766 BlockInfo.Begin.set(Slot);
767 }
768 }
769 }
770 }
771 }
772
773 // Update statistics.
774 NumMarkerSeen += MarkersFound;
775 return MarkersFound;
776}
777
778void StackColoring::calculateLocalLiveness() {
779 unsigned NumIters = 0;
780 bool changed = true;
781 // Create BitVector outside the loop and reuse them to avoid repeated heap
782 // allocations.
783 BitVector LocalLiveIn;
784 BitVector LocalLiveOut;
785 while (changed) {
786 changed = false;
787 ++NumIters;
788
789 for (const MachineBasicBlock *BB : BasicBlockNumbering) {
790 // Use an iterator to avoid repeated lookups.
791 LivenessMap::iterator BI = BlockLiveness.find(BB);
792 assert(BI != BlockLiveness.end() && "Block not found");
793 BlockLifetimeInfo &BlockInfo = BI->second;
794
795 // Compute LiveIn by unioning together the LiveOut sets of all preds.
796 LocalLiveIn.clear();
797 for (MachineBasicBlock *Pred : BB->predecessors()) {
798 LivenessMap::const_iterator I = BlockLiveness.find(Pred);
799 // PR37130: transformations prior to stack coloring can
800 // sometimes leave behind statically unreachable blocks; these
801 // can be safely skipped here.
802 if (I != BlockLiveness.end())
803 LocalLiveIn |= I->second.LiveOut;
804 }
805
806 // Compute LiveOut by subtracting out lifetimes that end in this
807 // block, then adding in lifetimes that begin in this block. If
808 // we have both BEGIN and END markers in the same basic block
809 // then we know that the BEGIN marker comes after the END,
810 // because we already handle the case where the BEGIN comes
811 // before the END when collecting the markers (and building the
812 // BEGIN/END vectors).
813 LocalLiveOut = LocalLiveIn;
814 LocalLiveOut.reset(BlockInfo.End);
815 LocalLiveOut |= BlockInfo.Begin;
816
817 // Update block LiveIn set, noting whether it has changed.
818 if (LocalLiveIn.test(BlockInfo.LiveIn)) {
819 changed = true;
820 BlockInfo.LiveIn |= LocalLiveIn;
821 }
822
823 // Update block LiveOut set, noting whether it has changed.
824 if (LocalLiveOut.test(BlockInfo.LiveOut)) {
825 changed = true;
826 BlockInfo.LiveOut |= LocalLiveOut;
827 }
828 }
829 } // while changed.
830
831 NumIterations = NumIters;
832}
833
834void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
836 SmallVector<bool, 16> DefinitelyInUse;
837
838 // For each block, find which slots are active within this block
839 // and update the live intervals.
840 for (const MachineBasicBlock &MBB : *MF) {
841 Starts.clear();
842 Starts.resize(NumSlots);
843 DefinitelyInUse.clear();
844 DefinitelyInUse.resize(NumSlots);
845
846 // Start the interval of the slots that we previously found to be 'in-use'.
847 BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
848 for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
849 pos = MBBLiveness.LiveIn.find_next(pos)) {
850 Starts[pos] = Indexes->getMBBStartIdx(&MBB);
851 }
852
853 // Create the interval for the basic blocks containing lifetime begin/end.
854 for (const MachineInstr &MI : MBB) {
855 SmallVector<int, 4> slots;
856 bool IsStart = false;
857 if (!isLifetimeStartOrEnd(MI, slots, IsStart))
858 continue;
859 SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
860 for (auto Slot : slots) {
861 if (IsStart) {
862 // If a slot is already definitely in use, we don't have to emit
863 // a new start marker because there is already a pre-existing
864 // one.
865 if (!DefinitelyInUse[Slot]) {
866 LiveStarts[Slot].push_back(ThisIndex);
867 DefinitelyInUse[Slot] = true;
868 }
869 if (!Starts[Slot].isValid())
870 Starts[Slot] = ThisIndex;
871 } else {
872 if (Starts[Slot].isValid()) {
873 VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
874 Intervals[Slot]->addSegment(
875 LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
876 Starts[Slot] = SlotIndex(); // Invalidate the start index
877 DefinitelyInUse[Slot] = false;
878 }
879 }
880 }
881 }
882
883 // Finish up started segments
884 for (unsigned i = 0; i < NumSlots; ++i) {
885 if (!Starts[i].isValid())
886 continue;
887
888 SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
889 VNInfo *VNI = Intervals[i]->getValNumInfo(0);
890 Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
891 }
892 }
893}
894
895bool StackColoring::removeAllMarkers() {
896 unsigned Count = 0;
897 for (MachineInstr *MI : Markers) {
898 MI->eraseFromParent();
899 Count++;
900 }
901 Markers.clear();
902
903 LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
904 return Count;
905}
906
907void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
908 unsigned FixedInstr = 0;
909 unsigned FixedMemOp = 0;
910 unsigned FixedDbg = 0;
911
912 // Remap debug information that refers to stack slots.
913 for (auto &VI : MF->getVariableDbgInfo()) {
914 if (!VI.Var || !VI.inStackSlot())
915 continue;
916 int Slot = VI.getStackSlot();
917 if (auto It = SlotRemap.find(Slot); It != SlotRemap.end()) {
918 LLVM_DEBUG(dbgs() << "Remapping debug info for ["
919 << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
920 VI.updateStackSlot(It->second);
921 FixedDbg++;
922 }
923 }
924
925 // Keep a list of *allocas* which need to be remapped.
926 DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
927
928 // Keep a list of allocas which has been affected by the remap.
929 SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
930
931 for (const std::pair<int, int> &SI : SlotRemap) {
932 const AllocaInst *From = MFI->getObjectAllocation(SI.first);
933 const AllocaInst *To = MFI->getObjectAllocation(SI.second);
934 assert(To && From && "Invalid allocation object");
935 Allocas[From] = To;
936
937 // If From is before wo, its possible that there is a use of From between
938 // them.
939 if (From->comesBefore(To))
940 const_cast<AllocaInst *>(To)->moveBefore(
941 const_cast<AllocaInst *>(From)->getIterator());
942
943 // AA might be used later for instruction scheduling, and we need it to be
944 // able to deduce the correct aliasing releationships between pointers
945 // derived from the alloca being remapped and the target of that remapping.
946 // The only safe way, without directly informing AA about the remapping
947 // somehow, is to directly update the IR to reflect the change being made
948 // here.
949 Instruction *Inst = const_cast<AllocaInst *>(To);
950 if (From->getType() != To->getType()) {
951 BitCastInst *Cast = new BitCastInst(Inst, From->getType());
952 Cast->insertAfter(Inst->getIterator());
953 Inst = Cast;
954 }
955
956 // We keep both slots to maintain AliasAnalysis metadata later.
957 MergedAllocas.insert(From);
958 MergedAllocas.insert(To);
959
960 // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
961 // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
962 // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
964 = MFI->getObjectSSPLayout(SI.first);
966 if (FromKind != MachineFrameInfo::SSPLK_None &&
967 (ToKind == MachineFrameInfo::SSPLK_None ||
969 FromKind != MachineFrameInfo::SSPLK_AddrOf)))
970 MFI->setObjectSSPLayout(SI.second, FromKind);
971
972 // The new alloca might not be valid in a llvm.dbg.declare for this
973 // variable, so poison out the use to make the verifier happy.
974 AllocaInst *FromAI = const_cast<AllocaInst *>(From);
975 if (FromAI->isUsedByMetadata())
977 for (auto &Use : FromAI->uses()) {
978 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
979 if (BCI->isUsedByMetadata())
980 ValueAsMetadata::handleRAUW(BCI, PoisonValue::get(BCI->getType()));
981 }
982
983 // Note that this will not replace uses in MMOs (which we'll update below),
984 // or anywhere else (which is why we won't delete the original
985 // instruction).
986 FromAI->replaceAllUsesWith(Inst);
987 }
988
989 // Remap all instructions to the new stack slots.
990 std::vector<std::vector<MachineMemOperand *>> SSRefs(
991 MFI->getObjectIndexEnd());
992 for (MachineBasicBlock &BB : *MF)
993 for (MachineInstr &I : BB) {
994 // Skip lifetime markers. We'll remove them soon.
995 if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
996 I.getOpcode() == TargetOpcode::LIFETIME_END)
997 continue;
998
999 // Update the MachineMemOperand to use the new alloca.
1000 for (MachineMemOperand *MMO : I.memoperands()) {
1001 // We've replaced IR-level uses of the remapped allocas, so we only
1002 // need to replace direct uses here.
1003 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
1004 if (!AI)
1005 continue;
1006
1007 auto It = Allocas.find(AI);
1008 if (It == Allocas.end())
1009 continue;
1010
1011 MMO->setValue(It->second);
1012 FixedMemOp++;
1013 }
1014
1015 // Update all of the machine instruction operands.
1016 for (MachineOperand &MO : I.operands()) {
1017 if (!MO.isFI())
1018 continue;
1019 int FromSlot = MO.getIndex();
1020
1021 // Don't touch arguments.
1022 if (FromSlot<0)
1023 continue;
1024
1025 // Only look at mapped slots.
1026 if (!SlotRemap.count(FromSlot))
1027 continue;
1028
1029 // In a debug build, check that the instruction that we are modifying is
1030 // inside the expected live range. If the instruction is not inside
1031 // the calculated range then it means that the alloca usage moved
1032 // outside of the lifetime markers, or that the user has a bug.
1033 // NOTE: Alloca address calculations which happen outside the lifetime
1034 // zone are okay, despite the fact that we don't have a good way
1035 // for validating all of the usages of the calculation.
1036#ifndef NDEBUG
1037 bool TouchesMemory = I.mayLoadOrStore();
1038 // If we *don't* protect the user from escaped allocas, don't bother
1039 // validating the instructions.
1040 if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1041 SlotIndex Index = Indexes->getInstructionIndex(I);
1042 const LiveInterval *Interval = &*Intervals[FromSlot];
1043 assert(Interval->find(Index) != Interval->end() &&
1044 "Found instruction usage outside of live range.");
1045 }
1046#endif
1047
1048 // Fix the machine instructions.
1049 int ToSlot = SlotRemap[FromSlot];
1050 MO.setIndex(ToSlot);
1051 FixedInstr++;
1052 }
1053
1054 // We adjust AliasAnalysis information for merged stack slots.
1056 bool ReplaceMemOps = false;
1057 for (MachineMemOperand *MMO : I.memoperands()) {
1058 // Collect MachineMemOperands which reference
1059 // FixedStackPseudoSourceValues with old frame indices.
1061 MMO->getPseudoValue())) {
1062 int FI = FSV->getFrameIndex();
1063 auto To = SlotRemap.find(FI);
1064 if (To != SlotRemap.end())
1065 SSRefs[FI].push_back(MMO);
1066 }
1067
1068 // If this memory location can be a slot remapped here,
1069 // we remove AA information.
1070 bool MayHaveConflictingAAMD = false;
1071 if (MMO->getAAInfo()) {
1072 if (const Value *MMOV = MMO->getValue()) {
1075
1076 if (Objs.empty())
1077 MayHaveConflictingAAMD = true;
1078 else
1079 for (Value *V : Objs) {
1080 // If this memory location comes from a known stack slot
1081 // that is not remapped, we continue checking.
1082 // Otherwise, we need to invalidate AA infomation.
1083 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1084 if (AI && MergedAllocas.count(AI)) {
1085 MayHaveConflictingAAMD = true;
1086 break;
1087 }
1088 }
1089 }
1090 }
1091 if (MayHaveConflictingAAMD) {
1092 NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1093 ReplaceMemOps = true;
1094 } else {
1095 NewMMOs.push_back(MMO);
1096 }
1097 }
1098
1099 // If any memory operand is updated, set memory references of
1100 // this instruction.
1101 if (ReplaceMemOps)
1102 I.setMemRefs(*MF, NewMMOs);
1103 }
1104
1105 // Rewrite MachineMemOperands that reference old frame indices.
1106 for (auto E : enumerate(SSRefs))
1107 if (!E.value().empty()) {
1108 const PseudoSourceValue *NewSV =
1109 MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
1110 for (MachineMemOperand *Ref : E.value())
1111 Ref->setValue(NewSV);
1112 }
1113
1114 // Update the location of C++ catch objects for the MSVC personality routine.
1115 if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1116 for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1117 for (WinEHHandlerType &H : TBME.HandlerArray)
1118 if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max())
1119 if (auto It = SlotRemap.find(H.CatchObj.FrameIndex);
1120 It != SlotRemap.end())
1121 H.CatchObj.FrameIndex = It->second;
1122
1123 LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1124 LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1125 LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1126 (void) FixedMemOp;
1127 (void) FixedDbg;
1128 (void) FixedInstr;
1129}
1130
1131void StackColoring::removeInvalidSlotRanges() {
1132 for (MachineBasicBlock &BB : *MF)
1133 for (MachineInstr &I : BB) {
1134 if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1135 I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1136 continue;
1137
1138 // Some intervals are suspicious! In some cases we find address
1139 // calculations outside of the lifetime zone, but not actual memory
1140 // read or write. Memory accesses outside of the lifetime zone are a clear
1141 // violation, but address calculations are okay. This can happen when
1142 // GEPs are hoisted outside of the lifetime zone.
1143 // So, in here we only check instructions which can read or write memory.
1144 if (!I.mayLoad() && !I.mayStore())
1145 continue;
1146
1147 // Check all of the machine operands.
1148 for (const MachineOperand &MO : I.operands()) {
1149 if (!MO.isFI())
1150 continue;
1151
1152 int Slot = MO.getIndex();
1153
1154 if (Slot<0)
1155 continue;
1156
1157 if (Intervals[Slot]->empty())
1158 continue;
1159
1160 // Check that the used slot is inside the calculated lifetime range.
1161 // If it is not, warn about it and invalidate the range.
1162 LiveInterval *Interval = &*Intervals[Slot];
1163 SlotIndex Index = Indexes->getInstructionIndex(I);
1164 if (Interval->find(Index) == Interval->end()) {
1165 Interval->clear();
1166 LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1167 EscapedAllocas++;
1168 }
1169 }
1170 }
1171}
1172
1173void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1174 unsigned NumSlots) {
1175 // Expunge slot remap map.
1176 for (unsigned i=0; i < NumSlots; ++i) {
1177 // If we are remapping i
1178 if (auto It = SlotRemap.find(i); It != SlotRemap.end()) {
1179 int Target = It->second;
1180 // As long as our target is mapped to something else, follow it.
1181 while (true) {
1182 auto It = SlotRemap.find(Target);
1183 if (It == SlotRemap.end())
1184 break;
1185 Target = It->second;
1186 SlotRemap[i] = Target;
1187 }
1188 }
1189 }
1190}
1191
1192bool StackColoringLegacy::runOnMachineFunction(MachineFunction &MF) {
1193 if (skipFunction(MF.getFunction()))
1194 return false;
1195
1196 StackColoring SC(&getAnalysis<SlotIndexesWrapperPass>().getSI());
1197 return SC.run(MF);
1198}
1199
1202 StackColoring SC(&MFAM.getResult<SlotIndexesAnalysis>(MF));
1203 if (SC.run(MF))
1205 return PreservedAnalyses::all();
1206}
1207
1208bool StackColoring::run(MachineFunction &Func) {
1209 LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1210 << "********** Function: " << Func.getName() << '\n');
1211 MF = &Func;
1212 MFI = &MF->getFrameInfo();
1213 BlockLiveness.clear();
1214 BasicBlocks.clear();
1215 BasicBlockNumbering.clear();
1216 Markers.clear();
1217 Intervals.clear();
1218 LiveStarts.clear();
1219 VNInfoAllocator.Reset();
1220
1221 unsigned NumSlots = MFI->getObjectIndexEnd();
1222
1223 // If there are no stack slots then there are no markers to remove.
1224 if (!NumSlots)
1225 return false;
1226
1227 SmallVector<int, 8> SortedSlots;
1228 SortedSlots.reserve(NumSlots);
1229 Intervals.reserve(NumSlots);
1230 LiveStarts.resize(NumSlots);
1231
1232 unsigned NumMarkers = collectMarkers(NumSlots);
1233
1234 unsigned TotalSize = 0;
1235 LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1236 << " slots\n");
1237 LLVM_DEBUG(dbgs() << "Slot structure:\n");
1238
1239 for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1240 LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1241 << " bytes.\n");
1242 TotalSize += MFI->getObjectSize(i);
1243 }
1244
1245 LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1246
1247 // Don't continue because there are not enough lifetime markers, or the
1248 // stack is too small, or we are told not to optimize the slots.
1249 if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
1250 LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1251 return removeAllMarkers();
1252 }
1253
1254 for (unsigned i=0; i < NumSlots; ++i) {
1255 std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1256 LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1257 Intervals.push_back(std::move(LI));
1258 SortedSlots.push_back(i);
1259 }
1260
1261 // Calculate the liveness of each block.
1262 calculateLocalLiveness();
1263 LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1264 LLVM_DEBUG(dump());
1265
1266 // Propagate the liveness information.
1267 calculateLiveIntervals(NumSlots);
1268 LLVM_DEBUG(dumpIntervals());
1269
1270 // Search for allocas which are used outside of the declared lifetime
1271 // markers.
1273 removeInvalidSlotRanges();
1274
1275 // Maps old slots to new slots.
1276 DenseMap<int, int> SlotRemap;
1277 unsigned RemovedSlots = 0;
1278 unsigned ReducedSize = 0;
1279
1280 // Do not bother looking at empty intervals.
1281 for (unsigned I = 0; I < NumSlots; ++I) {
1282 if (Intervals[SortedSlots[I]]->empty())
1283 SortedSlots[I] = -1;
1284 }
1285
1286 // This is a simple greedy algorithm for merging allocas. First, sort the
1287 // slots, placing the largest slots first. Next, perform an n^2 scan and look
1288 // for disjoint slots. When you find disjoint slots, merge the smaller one
1289 // into the bigger one and update the live interval. Remove the small alloca
1290 // and continue.
1291
1292 // Sort the slots according to their size. Place unused slots at the end.
1293 // Use stable sort to guarantee deterministic code generation.
1294 llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
1295 // We use -1 to denote a uninteresting slot. Place these slots at the end.
1296 if (LHS == -1)
1297 return false;
1298 if (RHS == -1)
1299 return true;
1300 // Sort according to size.
1301 return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1302 });
1303
1304 for (auto &s : LiveStarts)
1305 llvm::sort(s);
1306
1307 bool Changed = true;
1308 while (Changed) {
1309 Changed = false;
1310 for (unsigned I = 0; I < NumSlots; ++I) {
1311 if (SortedSlots[I] == -1)
1312 continue;
1313
1314 for (unsigned J=I+1; J < NumSlots; ++J) {
1315 if (SortedSlots[J] == -1)
1316 continue;
1317
1318 int FirstSlot = SortedSlots[I];
1319 int SecondSlot = SortedSlots[J];
1320
1321 // Objects with different stack IDs cannot be merged.
1322 if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
1323 continue;
1324
1325 LiveInterval *First = &*Intervals[FirstSlot];
1326 LiveInterval *Second = &*Intervals[SecondSlot];
1327 auto &FirstS = LiveStarts[FirstSlot];
1328 auto &SecondS = LiveStarts[SecondSlot];
1329 assert(!First->empty() && !Second->empty() && "Found an empty range");
1330
1331 // Merge disjoint slots. This is a little bit tricky - see the
1332 // Implementation Notes section for an explanation.
1333 if (!First->isLiveAtIndexes(SecondS) &&
1334 !Second->isLiveAtIndexes(FirstS)) {
1335 Changed = true;
1336 First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1337
1338 int OldSize = FirstS.size();
1339 FirstS.append(SecondS.begin(), SecondS.end());
1340 auto Mid = FirstS.begin() + OldSize;
1341 std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1342
1343 SlotRemap[SecondSlot] = FirstSlot;
1344 SortedSlots[J] = -1;
1345 LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1346 << SecondSlot << " together.\n");
1347 Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
1348 MFI->getObjectAlign(SecondSlot));
1349
1350 assert(MFI->getObjectSize(FirstSlot) >=
1351 MFI->getObjectSize(SecondSlot) &&
1352 "Merging a small object into a larger one");
1353
1354 RemovedSlots+=1;
1355 ReducedSize += MFI->getObjectSize(SecondSlot);
1356 MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1357 MFI->RemoveStackObject(SecondSlot);
1358 }
1359 }
1360 }
1361 }// While changed.
1362
1363 // Record statistics.
1364 StackSpaceSaved += ReducedSize;
1365 StackSlotMerged += RemovedSlots;
1366 LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1367 << ReducedSize << " bytes\n");
1368
1369 // Scan the entire function and update all machine operands that use frame
1370 // indices to use the remapped frame index.
1371 if (!SlotRemap.empty()) {
1372 expungeSlotMap(SlotRemap, NumSlots);
1373 remapInstructions(SlotRemap);
1374 }
1375
1376 return removeAllMarkers();
1377}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
This defines the Use class.
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
std::pair< uint64_t, uint64_t > Interval
This file contains the declarations for metadata subclasses.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
R600 Emit Clause Markers
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static int getStartOrEndSlot(const MachineInstr &MI)
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
Merge disjoint stack slots
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
PointerType * getType() const
Overload to return most specific pointer type.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & reset()
Definition BitVector.h:392
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition BitVector.h:335
BitVector & set()
Definition BitVector.h:351
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:159
void Reset()
Deallocate all but the current slab and reset the current pointer to the beginning of it,...
Definition Allocator.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
bool empty() const
Definition DenseMap.h:107
iterator end()
Definition DenseMap.h:81
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
LLVM_ABI bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
bool empty() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
SSPLayoutKind
Stack Smashing Protection (SSP) rules require that vulnerable stack allocations are located close the...
@ SSPLK_LargeArray
Array or nested array >= SSP-buffer-size.
@ SSPLK_AddrOf
The address of this allocation is exposed and triggered protection.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void print(raw_ostream &os) const
Print this index to the given raw_ostream.
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BumpPtrAllocator Allocator
static LLVM_ABI void handleRAUW(Value *From, Value *To)
Definition Metadata.cpp:545
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition Value.h:568
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
self_iterator getIterator()
Definition ilist_node.h:134
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2040
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2454
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1632
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
LLVM_ABI char & StackColoringLegacyID
StackSlotColoring - This pass performs stack coloring and merging.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
SmallVector< WinEHHandlerType, 1 > HandlerArray