LLVM 22.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/MapVector.h"
67#include "llvm/ADT/SetVector.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/ADT/StringRef.h"
71#include "llvm/ADT/Twine.h"
72#include "llvm/CodeGen/Passes.h"
73#include "llvm/IR/BasicBlock.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
88#include "llvm/MC/SectionKind.h"
89#include "llvm/Pass.h"
92#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstddef>
100#include <cstdint>
101#include <string>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "global-merge"
107
108// FIXME: This is only useful as a last-resort way to disable the pass.
109static cl::opt<bool>
110EnableGlobalMerge("enable-global-merge", cl::Hidden,
111 cl::desc("Enable the global merge pass"),
112 cl::init(true));
113
115GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
116 cl::desc("Set maximum offset for global merge pass"),
117 cl::init(0));
118
120 "global-merge-group-by-use", cl::Hidden,
121 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
122
124 "global-merge-all-const", cl::Hidden,
125 cl::desc("Merge all const globals without looking at uses"),
126 cl::init(false));
127
129 "global-merge-ignore-single-use", cl::Hidden,
130 cl::desc("Improve global merge pass to ignore globals only used alone"),
131 cl::init(true));
132
133static cl::opt<bool>
134EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
135 cl::desc("Enable global merge pass on constants"),
136 cl::init(false));
137
138// FIXME: this could be a transitional option, and we probably need to remove
139// it if only we are sure this optimization could always benefit all targets.
141EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
142 cl::desc("Enable global merge pass on external linkage"));
143
145 GlobalMergeMinDataSize("global-merge-min-data-size",
146 cl::desc("The minimum size in bytes of each global "
147 "that should considered in merging."),
148 cl::init(0), cl::Hidden);
149
150STATISTIC(NumMerged, "Number of globals merged");
151
152namespace {
153
154class GlobalMergeImpl {
155 const TargetMachine *TM = nullptr;
157 bool IsMachO = false;
158
159private:
160 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
161 bool isConst, unsigned AddrSpace) const;
162
163 /// Merge everything in \p Globals for which the corresponding bit
164 /// in \p GlobalSet is set.
165 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
166 const BitVector &GlobalSet, Module &M, bool isConst,
167 unsigned AddrSpace) const;
168
169 /// Check if the given variable has been identified as must keep
170 /// \pre setMustKeepGlobalVariables must have been called on the Module that
171 /// contains GV
172 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
173 return MustKeepGlobalVariables.count(GV);
174 }
175
176 /// Collect every variables marked as "used" or used in a landing pad
177 /// instruction for this Module.
178 void setMustKeepGlobalVariables(Module &M);
179
180 /// Collect every variables marked as "used"
181 void collectUsedGlobalVariables(Module &M, StringRef Name);
182
183 /// Keep track of the GlobalVariable that must not be merged away
184 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
185
186public:
187 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
188 : TM(TM), Opt(Opt) {}
189 bool run(Module &M);
190};
191
192class GlobalMerge : public FunctionPass {
193 const TargetMachine *TM = nullptr;
194 GlobalMergeOptions Opt;
195
196public:
197 static char ID; // Pass identification, replacement for typeid.
198
199 explicit GlobalMerge() : FunctionPass(ID) {
200 Opt.MaxOffset = GlobalMergeMaxOffset;
201 Opt.MergeConstantGlobals = EnableGlobalMergeOnConst;
202 Opt.MergeConstAggressive = GlobalMergeAllConst;
204 }
205
206 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
207 bool OnlyOptimizeForSize, bool MergeExternalGlobals,
208 bool MergeConstantGlobals, bool MergeConstAggressive)
209 : FunctionPass(ID), TM(TM) {
210 Opt.MaxOffset = MaximalOffset;
211 Opt.SizeOnly = OnlyOptimizeForSize;
212 Opt.MergeExternal = MergeExternalGlobals;
213 Opt.MergeConstantGlobals = MergeConstantGlobals;
214 Opt.MergeConstAggressive = MergeConstAggressive;
216 }
217
218 bool doInitialization(Module &M) override {
219 auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
220 Metadata *SDL = M.getModuleFlag("SmallDataLimit");
221 if (!SDL)
222 return std::nullopt;
223 return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
224 };
225 if (GlobalMergeMinDataSize.getNumOccurrences())
226 Opt.MinSize = GlobalMergeMinDataSize;
227 else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
228 Opt.MinSize = *SDL + 1;
229 else
230 Opt.MinSize = 0;
231
232 GlobalMergeImpl P(TM, Opt);
233 return P.run(M);
234 }
235 bool runOnFunction(Function &F) override { return false; }
236
237 StringRef getPassName() const override { return "Merge internal globals"; }
238
239 void getAnalysisUsage(AnalysisUsage &AU) const override {
240 AU.setPreservesCFG();
241 FunctionPass::getAnalysisUsage(AU);
242 }
243};
244
245} // end anonymous namespace
246
248 GlobalMergeImpl P(TM, Options);
249 bool Changed = P.run(M);
250 if (!Changed)
251 return PreservedAnalyses::all();
252
255 return PA;
256}
257
258char GlobalMerge::ID = 0;
259
260INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
261
262bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
263 Module &M, bool isConst,
264 unsigned AddrSpace) const {
265 auto &DL = M.getDataLayout();
266 // FIXME: Find better heuristics
268 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
269 // We don't support scalable global variables.
270 return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
271 DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
272 });
273
274 // If we want to just blindly group all globals together, do so.
275 if (!GlobalMergeGroupByUse || (Opt.MergeConstAggressive && isConst)) {
276 BitVector AllGlobals(Globals.size(), true);
277 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
278 }
279
280 // If we want to be smarter, look at all uses of each global, to try to
281 // discover all sets of globals used together, and how many times each of
282 // these sets occurred.
283 //
284 // Keep this reasonably efficient, by having an append-only list of all sets
285 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
286 // code (currently, a Function) to the set of globals seen so far that are
287 // used together in that unit (GlobalUsesByFunction).
288 //
289 // When we look at the Nth global, we know that any new set is either:
290 // - the singleton set {N}, containing this global only, or
291 // - the union of {N} and a previously-discovered set, containing some
292 // combination of the previous N-1 globals.
293 // Using that knowledge, when looking at the Nth global, we can keep:
294 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
295 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
296 // if it actually occurs.
297
298 // We keep track of the sets of globals used together "close enough".
299 struct UsedGlobalSet {
300 BitVector Globals;
301 unsigned UsageCount = 1;
302
303 UsedGlobalSet(size_t Size) : Globals(Size) {}
304 };
305
306 // Each set is unique in UsedGlobalSets.
307 std::vector<UsedGlobalSet> UsedGlobalSets;
308
309 // Avoid repeating the create-global-set pattern.
310 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
311 UsedGlobalSets.emplace_back(Globals.size());
312 return UsedGlobalSets.back();
313 };
314
315 // The first set is the empty set.
316 CreateGlobalSet().UsageCount = 0;
317
318 // We define "close enough" to be "in the same function".
319 // FIXME: Grouping uses by function is way too aggressive, so we should have
320 // a better metric for distance between uses.
321 // The obvious alternative would be to group by BasicBlock, but that's in
322 // turn too conservative..
323 // Anything in between wouldn't be trivial to compute, so just stick with
324 // per-function grouping.
325
326 // The value type is an index into UsedGlobalSets.
327 // The default (0) conveniently points to the empty set.
328 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
329
330 // Now, look at each merge-eligible global in turn.
331
332 // Keep track of the sets we already encountered to which we added the
333 // current global.
334 // Each element matches the same-index element in UsedGlobalSets.
335 // This lets us efficiently tell whether a set has already been expanded to
336 // include the current global.
337 std::vector<size_t> EncounteredUGS;
338
339 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
340 GlobalVariable *GV = Globals[GI];
341
342 // Reset the encountered sets for this global and grow it in case we created
343 // new sets for the previous global.
344 EncounteredUGS.assign(UsedGlobalSets.size(), 0);
345
346 // We might need to create a set that only consists of the current global.
347 // Keep track of its index into UsedGlobalSets.
348 size_t CurGVOnlySetIdx = 0;
349
350 // For each global, look at all its Uses.
351 for (auto &U : GV->uses()) {
352 // This Use might be a ConstantExpr. We're interested in Instruction
353 // users, so look through ConstantExpr...
354 Use *UI, *UE;
355 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
356 if (CE->use_empty())
357 continue;
358 UI = &*CE->use_begin();
359 UE = nullptr;
360 } else if (isa<Instruction>(U.getUser())) {
361 UI = &U;
362 UE = UI->getNext();
363 } else {
364 continue;
365 }
366
367 // ...to iterate on all the instruction users of the global.
368 // Note that we iterate on Uses and not on Users to be able to getNext().
369 for (; UI != UE; UI = UI->getNext()) {
370 Instruction *I = dyn_cast<Instruction>(UI->getUser());
371 if (!I)
372 continue;
373
374 Function *ParentFn = I->getParent()->getParent();
375
376 // If we're only optimizing for size, ignore non-minsize functions.
377 if (Opt.SizeOnly && !ParentFn->hasMinSize())
378 continue;
379
380 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
381
382 // If this is the first global the function uses, map it to the set
383 // consisting of this global only.
384 if (!UGSIdx) {
385 // If that set doesn't exist yet, create it.
386 if (!CurGVOnlySetIdx) {
387 CurGVOnlySetIdx = UsedGlobalSets.size();
388 CreateGlobalSet().Globals.set(GI);
389 } else {
390 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
391 }
392
393 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
394 continue;
395 }
396
397 // If we already encountered a use of this global in this function, just
398 // increment the counter.
399 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
400 ++UsedGlobalSets[UGSIdx].UsageCount;
401 continue;
402 }
403
404 // If not, the previous set wasn't actually used in this function.
405 --UsedGlobalSets[UGSIdx].UsageCount;
406
407 // If we already expanded the previous set to include this global, just
408 // reuse that expanded set.
409 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
410 ++UsedGlobalSets[ExpandedIdx].UsageCount;
411 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
412 continue;
413 }
414
415 // If not, create a new set consisting of the union of the previous set
416 // and this global. Mark it as encountered, so we can reuse it later.
417 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
418 UsedGlobalSets.size();
419
420 UsedGlobalSet &NewUGS = CreateGlobalSet();
421 NewUGS.Globals.set(GI);
422 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
423 }
424 }
425 }
426
427 // We can choose to merge all globals together, but ignore globals never used
428 // with another global. This catches the obviously non-profitable cases of
429 // having a single global, but is aggressive enough for any other case.
431 BitVector AllGlobals(Globals.size());
432 for (const UsedGlobalSet &UGS : UsedGlobalSets) {
433 if (UGS.UsageCount == 0)
434 continue;
435 if (UGS.Globals.count() > 1)
436 AllGlobals |= UGS.Globals;
437 }
438 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
439 }
440
441 // Now we found a bunch of sets of globals used together. We accumulated
442 // the number of times we encountered the sets (i.e., the number of functions
443 // that use that exact set of globals).
444 //
445 // Multiply that by the size of the set to give us a crude profitability
446 // metric.
447 llvm::stable_sort(UsedGlobalSets,
448 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
449 return UGS1.Globals.count() * UGS1.UsageCount <
450 UGS2.Globals.count() * UGS2.UsageCount;
451 });
452
453 // Starting from the sets with the best (=biggest) profitability, find a
454 // good combination.
455 // The ideal (and expensive) solution can only be found by trying all
456 // combinations, looking for the one with the best profitability.
457 // Don't be smart about it, and just pick the first compatible combination,
458 // starting with the sets with the best profitability.
459 BitVector PickedGlobals(Globals.size());
460 bool Changed = false;
461
462 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
463 if (UGS.UsageCount == 0)
464 continue;
465 if (PickedGlobals.anyCommon(UGS.Globals))
466 continue;
467 PickedGlobals |= UGS.Globals;
468 // If the set only contains one global, there's no point in merging.
469 // Ignore the global for inclusion in other sets though, so keep it in
470 // PickedGlobals.
471 if (UGS.Globals.count() < 2)
472 continue;
473 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
474 }
475
476 return Changed;
477}
478
479bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
480 const BitVector &GlobalSet, Module &M,
481 bool isConst, unsigned AddrSpace) const {
482 assert(Globals.size() > 1);
483
484 Type *Int32Ty = Type::getInt32Ty(M.getContext());
485 Type *Int8Ty = Type::getInt8Ty(M.getContext());
486 auto &DL = M.getDataLayout();
487
488 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
489 << GlobalSet.find_first() << ", total of " << Globals.size()
490 << "\n");
491
492 bool Changed = false;
493 ssize_t i = GlobalSet.find_first();
494 while (i != -1) {
495 ssize_t j = 0;
496 uint64_t MergedSize = 0;
497 std::vector<Type*> Tys;
498 std::vector<Constant*> Inits;
499 std::vector<unsigned> StructIdxs;
500
501 bool HasExternal = false;
502 StringRef FirstExternalName;
503 Align MaxAlign;
504 unsigned CurIdx = 0;
505 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
506 Type *Ty = Globals[j]->getValueType();
507
508 // Make sure we use the same alignment AsmPrinter would use.
509 Align Alignment = DL.getPreferredAlign(Globals[j]);
510 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
511 MergedSize += Padding;
512 MergedSize += DL.getTypeAllocSize(Ty);
513 if (MergedSize > Opt.MaxOffset) {
514 break;
515 }
516 if (Padding) {
517 Tys.push_back(ArrayType::get(Int8Ty, Padding));
518 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
519 ++CurIdx;
520 }
521 Tys.push_back(Ty);
522 Inits.push_back(Globals[j]->getInitializer());
523 StructIdxs.push_back(CurIdx++);
524
525 MaxAlign = std::max(MaxAlign, Alignment);
526
527 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
528 HasExternal = true;
529 FirstExternalName = Globals[j]->getName();
530 }
531 }
532
533 // Exit early if there is only one global to merge.
534 if (Tys.size() < 2) {
535 i = j;
536 continue;
537 }
538
539 // If merged variables doesn't have external linkage, we needn't to expose
540 // the symbol after merging.
544 // Use a packed struct so we can control alignment.
545 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
546 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
547
548 // On Darwin external linkage needs to be preserved, otherwise
549 // dsymutil cannot preserve the debug info for the merged
550 // variables. If they have external linkage, use the symbol name
551 // of the first variable merged as the suffix of global symbol
552 // name. This avoids a link-time naming conflict for the
553 // _MergedGlobals symbols.
554 Twine MergedName =
555 (IsMachO && HasExternal)
556 ? "_MergedGlobals_" + FirstExternalName
557 : "_MergedGlobals";
558 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
559 auto *MergedGV = new GlobalVariable(
560 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
562
563 MergedGV->setAlignment(MaxAlign);
564 MergedGV->setSection(Globals[i]->getSection());
565
566 LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");
567
568 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
569 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
570 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
571 std::string Name(Globals[k]->getName());
572 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
574 Globals[k]->getDLLStorageClass();
575
576 // Copy metadata while adjusting any debug info metadata by the original
577 // global's offset within the merged global.
578 MergedGV->copyMetadata(Globals[k],
579 MergedLayout->getElementOffset(StructIdxs[idx]));
580
581 Constant *Idx[2] = {
582 ConstantInt::get(Int32Ty, 0),
583 ConstantInt::get(Int32Ty, StructIdxs[idx]),
584 };
585 Constant *GEP =
586 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
587 Globals[k]->replaceAllUsesWith(GEP);
588 Globals[k]->eraseFromParent();
589
590 // Emit an alias for the original variable name. This is necessary for an
591 // external symbol, as it may be accessed from another object. For
592 // internal symbols, it's not strictly required, but it's useful.
593 //
594 // This _should_ also work on Mach-O ever since '.alt_entry' support was
595 // added in 2016. Unfortunately, there's a bug in ld-prime (present at
596 // least from Xcode 15.0 through Xcode 16.0), in which -dead_strip doesn't
597 // always honor alt_entry. To workaround this issue, we don't emit aliases
598 // on Mach-O. Except, we _must_ do so for external symbols. That means
599 // MergeExternal is broken with that linker. (That option is currently off
600 // by default on MachO).
601 if (!IsMachO || Linkage == GlobalValue::ExternalLinkage) {
602 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
603 Linkage, Name, GEP, &M);
604 GA->setVisibility(Visibility);
605 GA->setDLLStorageClass(DLLStorage);
606 }
607
608 NumMerged++;
609 }
610 Changed = true;
611 i = j;
612 }
613
614 return Changed;
615}
616
617void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
618 // Extract global variables from llvm.used array
619 const GlobalVariable *GV = M.getGlobalVariable(Name);
620 if (!GV || !GV->hasInitializer()) return;
621
622 // Should be an array of 'i8*'.
623 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
624
625 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
626 if (const GlobalVariable *G =
628 MustKeepGlobalVariables.insert(G);
629}
630
631void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
632 collectUsedGlobalVariables(M, "llvm.used");
633 collectUsedGlobalVariables(M, "llvm.compiler.used");
634
635 for (Function &F : M) {
636 for (BasicBlock &BB : F) {
637 BasicBlock::iterator Pad = BB.getFirstNonPHIIt();
638 auto *II = dyn_cast<IntrinsicInst>(Pad);
639 if (!Pad->isEHPad() &&
640 !(II && II->getIntrinsicID() == Intrinsic::eh_typeid_for))
641 continue;
642
643 // Keep globals used by landingpads, catchpads,
644 // or intrinsics that require a plain global.
645 for (const Use &U : Pad->operands()) {
646 if (const GlobalVariable *GV =
647 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
648 MustKeepGlobalVariables.insert(GV);
649 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
650 for (const Use &Elt : CA->operands()) {
651 if (const GlobalVariable *GV =
652 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
653 MustKeepGlobalVariables.insert(GV);
654 }
655 }
656 }
657 }
658 }
659}
660
661// This function returns true if the given data Section name has custom
662// subsection-splitting semantics in Mach-O (such as splitting by a fixed size)
663//
664// See also ObjFile::parseSections and getRecordSize in lld/MachO/InputFiles.cpp
665static bool isSpecialMachOSection(StringRef Section) {
666 // Uses starts_with, since section attributes can appear at the end of the
667 // name.
668 return Section.starts_with("__DATA,__cfstring") ||
669 Section.starts_with("__DATA,__objc_classrefs") ||
670 Section.starts_with("__DATA,__objc_selrefs");
671}
672
673bool GlobalMergeImpl::run(Module &M) {
675 return false;
676
677 IsMachO = M.getTargetTriple().isOSBinFormatMachO();
678
679 auto &DL = M.getDataLayout();
681 Globals, ConstGlobals, BSSGlobals;
682 bool Changed = false;
683 setMustKeepGlobalVariables(M);
684
685 LLVM_DEBUG({
686 dbgs() << "Number of GV that must be kept: " <<
687 MustKeepGlobalVariables.size() << "\n";
688 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
689 dbgs() << "Kept: " << *KeptGV << "\n";
690 });
691 // Grab all non-const globals.
692 for (auto &GV : M.globals()) {
693 // Merge is safe for "normal" internal or external globals only
694 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
695 continue;
696
697 // It's not safe to merge globals that may be preempted
698 if (TM && !TM->shouldAssumeDSOLocal(&GV))
699 continue;
700
701 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
702 !GV.hasLocalLinkage())
703 continue;
704
706 assert(PT && "Global variable is not a pointer!");
707
708 unsigned AddressSpace = PT->getAddressSpace();
710
711 // On Mach-O, some section names have special semantics. Don't merge these.
712 if (IsMachO && isSpecialMachOSection(Section))
713 continue;
714
715 // Ignore all 'special' globals.
716 if (GV.getName().starts_with("llvm.") ||
717 GV.getName().starts_with(".llvm.") || Section == "llvm.metadata")
718 continue;
719
720 // Ignore all "required" globals:
721 if (isMustKeepGlobalVariable(&GV))
722 continue;
723
724 // Don't merge tagged globals, as each global should have its own unique
725 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
726 // with compatible alignment and the same contents may be merged as long as
727 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
728 // size_b / 16`).
729 if (GV.isTagged())
730 continue;
731
732 Type *Ty = GV.getValueType();
733 TypeSize AllocSize = DL.getTypeAllocSize(Ty);
734 bool CanMerge = AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize;
735 if (CanMerge) {
736 if (TM &&
738 BSSGlobals[{AddressSpace, Section}].push_back(&GV);
739 else if (GV.isConstant())
740 ConstGlobals[{AddressSpace, Section}].push_back(&GV);
741 else
742 Globals[{AddressSpace, Section}].push_back(&GV);
743 }
744 LLVM_DEBUG(dbgs() << "GV " << (CanMerge ? "" : "not ") << "to merge: " << GV
745 << "\n");
746 }
747
748 for (auto &P : Globals)
749 if (P.second.size() > 1)
750 Changed |= doMerge(P.second, M, false, P.first.first);
751
752 for (auto &P : BSSGlobals)
753 if (P.second.size() > 1)
754 Changed |= doMerge(P.second, M, false, P.first.first);
755
756 if (Opt.MergeConstantGlobals)
757 for (auto &P : ConstGlobals)
758 if (P.second.size() > 1)
759 Changed |= doMerge(P.second, M, true, P.first.first);
760
761 return Changed;
762}
763
765 bool OnlyOptimizeForSize,
766 bool MergeExternalByDefault,
767 bool MergeConstantByDefault,
768 bool MergeConstAggressiveByDefault) {
769 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
770 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
771 bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
772 bool MergeConstAggressive = GlobalMergeAllConst.getNumOccurrences() > 0
774 : MergeConstAggressiveByDefault;
775 return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
776 MergeConstant, MergeConstAggressive);
777}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Finalize Linkage
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static bool isSpecialMachOSection(StringRef Section)
static cl::opt< bool > GlobalMergeAllConst("global-merge-all-const", cl::Hidden, cl::desc("Merge all const globals without looking at uses"), cl::init(false))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMinDataSize("global-merge-min-data-size", cl::desc("The minimum size in bytes of each global " "that should considered in merging."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static StringRef getName(Value *V)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:300
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:308
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
ConstantArray - Constant Array Declarations.
Definition Constants.h:433
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1301
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:585
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasExternalLinkage() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:316
bool hasLocalLinkage() const
bool isTagged() const
void setDLLStorageClass(DLLStorageClassTypes C)
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition GlobalValue.h:74
Module * getParent()
Get the module that this global value is contained inside of...
PointerType * getType() const
Global values are always pointers.
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition GlobalValue.h:67
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition GlobalValue.h:52
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
Class to represent pointers.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:621
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:652
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
Changed
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition ELF.h:539
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2040
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false, bool MergeConstantByDefault=false, bool MergeConstAggressiveByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
LLVM_ABI void initializeGlobalMergePass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
LLVM_ABI GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition Module.cpp:865
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
bool MergeConstAggressive
Whether we should merge constant global variables aggressively without looking at use.
Definition GlobalMerge.h:34
bool SizeOnly
Whether we should try to optimize for size only.
Definition GlobalMerge.h:39
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition GlobalMerge.h:29
bool MergeConstantGlobals
Whether we should merge constant global variables.
Definition GlobalMerge.h:31