197#include "llvm/IR/IntrinsicsAMDGPU.h"
214#define DEBUG_TYPE "amdgpu-lower-module-lds"
222 "amdgpu-super-align-lds-globals",
223 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
226enum class LoweringKind { module, table, kernel, hybrid };
228 "amdgpu-lower-module-lds-strategy",
232 clEnumValN(LoweringKind::table,
"table",
"Lower via table lookup"),
233 clEnumValN(LoweringKind::module,
"module",
"Lower via module struct"),
235 LoweringKind::kernel,
"kernel",
236 "Lower variables reachable from one kernel, otherwise abort"),
238 "Lower via mixture of above strategies")));
240template <
typename T> std::vector<T> sortByName(std::vector<T> &&V) {
241 llvm::sort(V, [](
const auto *L,
const auto *R) {
242 return L->getName() < R->getName();
244 return {std::move(V)};
247class AMDGPULowerModuleLDS {
251 removeLocalVarsFromUsedLists(
Module &M,
263 LocalVar->removeDeadConstantUsers();
288 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
291 Func->getParent(), Intrinsic::donothing, {});
293 Value *UseInstance[1] = {
294 Builder.CreateConstInBoundsGEP1_32(SGV->
getValueType(), SGV, 0)};
303 struct LDSVariableReplacement {
313 static Constant *getAddressesOfVariablesInKernel(
327 auto ConstantGepIt = LDSVarsToConstantGEP.
find(GV);
328 if (ConstantGepIt != LDSVarsToConstantGEP.
end()) {
330 Elements.push_back(elt);
342 if (Variables.
empty()) {
347 const size_t NumberVariables = Variables.
size();
348 const size_t NumberKernels = kernels.
size();
357 std::vector<Constant *> overallConstantExprElts(NumberKernels);
358 for (
size_t i = 0; i < NumberKernels; i++) {
359 auto Replacement = KernelToReplacement.
find(kernels[i]);
360 overallConstantExprElts[i] =
361 (Replacement == KernelToReplacement.
end())
363 : getAddressesOfVariablesInKernel(
364 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
379 Value *OptionalIndex) {
385 Value *tableKernelIndex = getTableLookupKernelIndex(M,
I->getFunction());
391 Builder.SetInsertPoint(
I);
395 ConstantInt::get(I32, 0),
401 Value *Address = Builder.CreateInBoundsGEP(
402 LookupTable->getValueType(), LookupTable, GEPIdx, GV->
getName());
404 Value *loaded = Builder.CreateLoad(I32, Address);
412 void replaceUsesInInstructionsWithTableLookup(
420 for (
size_t Index = 0; Index < ModuleScopeVariables.
size(); Index++) {
421 auto *GV = ModuleScopeVariables[Index];
428 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
429 ConstantInt::get(I32, Index));
440 if (VariableSet.
empty())
443 for (
Function &Func : M.functions()) {
458 chooseBestVariableForModuleStrategy(
const DataLayout &
DL,
464 size_t UserCount = 0;
467 CandidateTy() =
default;
470 : GV(GV), UserCount(UserCount),
Size(AllocSize) {}
474 if (UserCount <
Other.UserCount) {
477 if (UserCount >
Other.UserCount) {
495 CandidateTy MostUsed;
497 for (
auto &K : LDSVars) {
499 if (K.second.size() <= 1) {
504 CandidateTy Candidate(
507 if (MostUsed < Candidate)
508 MostUsed = Candidate;
532 auto [It, Inserted] = tableKernelIndexCache.
try_emplace(
F);
534 auto InsertAt =
F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
537 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
543 static std::vector<Function *> assignLDSKernelIDToEachKernel(
551 std::vector<Function *> OrderedKernels;
552 if (!KernelsThatAllocateTableLDS.
empty() ||
553 !KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
555 for (
Function &Func : M->functions()) {
556 if (Func.isDeclaration())
561 if (KernelsThatAllocateTableLDS.
contains(&Func) ||
562 KernelsThatIndirectlyAllocateDynamicLDS.
contains(&Func)) {
564 OrderedKernels.push_back(&Func);
569 OrderedKernels = sortByName(std::move(OrderedKernels));
575 if (OrderedKernels.size() > UINT32_MAX) {
580 for (
size_t i = 0; i < OrderedKernels.size(); i++) {
584 OrderedKernels[i]->setMetadata(
"llvm.amdgcn.lds.kernel.id",
588 return OrderedKernels;
591 static void partitionVariablesIntoIndirectStrategies(
600 LoweringKindLoc != LoweringKind::hybrid
602 : chooseBestVariableForModuleStrategy(
603 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
608 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
611 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
617 assert(K.second.size() != 0);
620 DynamicVariables.
insert(GV);
624 switch (LoweringKindLoc) {
625 case LoweringKind::module:
626 ModuleScopeVariables.insert(GV);
629 case LoweringKind::table:
630 TableLookupVariables.
insert(GV);
633 case LoweringKind::kernel:
634 if (K.second.size() == 1) {
635 KernelAccessVariables.
insert(GV);
639 "cannot lower LDS '" + GV->
getName() +
640 "' to kernel access as it is reachable from multiple kernels");
644 case LoweringKind::hybrid: {
645 if (GV == HybridModuleRoot) {
646 assert(K.second.size() != 1);
647 ModuleScopeVariables.insert(GV);
648 }
else if (K.second.size() == 1) {
649 KernelAccessVariables.
insert(GV);
650 }
else if (
set_is_subset(K.second, HybridModuleRootKernels)) {
651 ModuleScopeVariables.insert(GV);
653 TableLookupVariables.
insert(GV);
662 assert(ModuleScopeVariables.
size() + TableLookupVariables.
size() +
663 KernelAccessVariables.
size() + DynamicVariables.
size() ==
664 LDSToKernelsThatNeedToAccessItIndirectly.size());
677 if (ModuleScopeVariables.
empty()) {
683 LDSVariableReplacement ModuleScopeReplacement =
684 createLDSVariableReplacement(M,
"llvm.amdgcn.module.lds",
685 ModuleScopeVariables);
693 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
696 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
699 replaceLDSVariablesWithStruct(
700 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
713 for (
Function &Func : M.functions()) {
717 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
718 replaceLDSVariablesWithStruct(
719 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
728 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
732 return ModuleScopeReplacement.SGV;
736 lowerKernelScopeStructVariables(
745 for (
Function &Func : M.functions()) {
754 KernelUsedVariables.
insert(v);
762 KernelUsedVariables.
insert(v);
768 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
770 KernelUsedVariables.
erase(v);
774 if (KernelUsedVariables.
empty()) {
786 if (!Func.hasName()) {
790 std::string VarName =
791 (
Twine(
"llvm.amdgcn.kernel.") + Func.getName() +
".lds").str();
794 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
802 markUsedByKernel(&Func, Replacement.SGV);
805 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
806 KernelToReplacement[&Func] = Replacement;
809 replaceLDSVariablesWithStruct(
810 M, KernelUsedVariables, Replacement, [&Func](
Use &U) {
812 return I &&
I->getFunction() == &Func;
815 return KernelToReplacement;
835 Align MaxDynamicAlignment(1);
839 MaxDynamicAlignment =
845 UpdateMaxAlignment(GV);
849 UpdateMaxAlignment(GV);
858 N->setAlignment(MaxDynamicAlignment);
868 std::vector<Function *>
const &OrderedKernels) {
870 if (!KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
875 std::vector<Constant *> newDynamicLDS;
878 for (
auto &
func : OrderedKernels) {
880 if (KernelsThatIndirectlyAllocateDynamicLDS.
contains(
func)) {
882 if (!
func->hasName()) {
887 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo,
func);
889 KernelToCreatedDynamicLDS[
func] =
N;
891 markUsedByKernel(
func,
N);
895 emptyCharArray,
N, ConstantInt::get(I32, 0),
true);
901 assert(OrderedKernels.size() == newDynamicLDS.size());
907 "llvm.amdgcn.dynlds.offset.table",
nullptr,
918 replaceUseWithTableLookup(M, Builder, table, GV, U,
nullptr);
922 return KernelToCreatedDynamicLDS;
927 bool NeedsReplacement =
false;
932 NeedsReplacement =
true;
937 if (!NeedsReplacement)
949 U.getUser()->replaceUsesOfWith(GV, NewGV);
956 bool lowerSpecialLDSVariables(
962 int NumAbsolutes = 0;
963 std::vector<GlobalVariable *> OrderedGVs;
964 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
971 if (LDSToKernelsThatNeedToAccessItIndirectly[GV].
size() > 1) {
972 OrderedGVs.push_back(GV);
978 LDSToKernelsThatNeedToAccessItIndirectly.erase(GV);
980 OrderedGVs = sortByName(std::move(OrderedGVs));
983 unsigned BarId = NumAbsolutes + 1;
985 NumAbsolutes += BarCnt;
989 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
990 recordLDSAbsoluteAddress(&M, GV,
Offset);
997 std::vector<Function *> OrderedKernels;
1001 OrderedKernels.push_back(
F);
1003 OrderedKernels = sortByName(std::move(OrderedKernels));
1016 OrderedGVs.push_back(GV);
1018 OrderedGVs = sortByName(std::move(OrderedGVs));
1022 auto NewGV = uniquifyGVPerKernel(M, GV,
F);
1025 unsigned BarId = Kernel2BarId[
F];
1026 BarId += NumAbsolutes + 1;
1028 Kernel2BarId[
F] += BarCnt;
1029 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
1030 recordLDSAbsoluteAddress(&M, NewGV,
Offset);
1045 bool runOnModule(
Module &M) {
1047 bool Changed = superAlignLDSGlobals(M);
1063 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(
F);
1069 Changed |= lowerSpecialLDSVariables(
1070 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);
1078 partitionVariablesIntoIndirectStrategies(
1079 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
1080 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
1087 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1088 ModuleScopeVariables);
1090 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1091 TableLookupVariables);
1094 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1097 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
1098 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
1101 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
1102 KernelsThatAllocateModuleLDS,
1103 MaybeModuleScopeStruct);
1106 for (
auto &GV : KernelAccessVariables) {
1107 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
1108 assert(funcs.size() == 1);
1109 LDSVariableReplacement Replacement =
1110 KernelToReplacement[*(funcs.begin())];
1115 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](
Use &U) {
1121 std::vector<Function *> OrderedKernels =
1122 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
1123 KernelsThatIndirectlyAllocateDynamicLDS);
1125 if (!KernelsThatAllocateTableLDS.
empty()) {
1131 auto TableLookupVariablesOrdered =
1132 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.
begin(),
1133 TableLookupVariables.
end()));
1136 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1137 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1142 lowerDynamicLDSVariables(M, LDSUsesInfo,
1143 KernelsThatIndirectlyAllocateDynamicLDS,
1144 DynamicVariables, OrderedKernels);
1149 for (
auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1150 &KernelsThatAllocateTableLDS})
1159 for (
Function &Func : M.functions()) {
1174 const bool AllocateModuleScopeStruct =
1175 MaybeModuleScopeStruct &&
1176 KernelsThatAllocateModuleLDS.
contains(&Func);
1178 auto Replacement = KernelToReplacement.
find(&Func);
1179 const bool AllocateKernelScopeStruct =
1180 Replacement != KernelToReplacement.
end();
1182 const bool AllocateDynamicVariable =
1183 KernelToCreatedDynamicLDS.
contains(&Func);
1187 if (AllocateModuleScopeStruct) {
1193 if (AllocateKernelScopeStruct) {
1196 recordLDSAbsoluteAddress(&M, KernelStruct,
Offset);
1204 if (AllocateDynamicVariable) {
1205 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1207 recordLDSAbsoluteAddress(&M, DynamicVariable,
Offset);
1222 if (AllocateDynamicVariable)
1225 Func.addFnAttr(
"amdgpu-lds-size", Buffer);
1244 static bool superAlignLDSGlobals(
Module &M) {
1247 if (!SuperAlignLDSGlobals) {
1251 for (
auto &GV : M.globals()) {
1271 Alignment = std::max(Alignment,
Align(16));
1272 }
else if (GVSize > 4) {
1274 Alignment = std::max(Alignment,
Align(8));
1275 }
else if (GVSize > 2) {
1277 Alignment = std::max(Alignment,
Align(4));
1278 }
else if (GVSize > 1) {
1280 Alignment = std::max(Alignment,
Align(2));
1291 static LDSVariableReplacement createLDSVariableReplacement(
1292 Module &M, std::string VarName,
1309 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1310 LDSVarsToTransform.
begin(), LDSVarsToTransform.
end()));
1322 std::vector<GlobalVariable *> LocalVars;
1324 LocalVars.reserve(LDSVarsToTransform.
size());
1325 IsPaddingField.
reserve(LDSVarsToTransform.
size());
1328 for (
auto &
F : LayoutFields) {
1331 Align DataAlign =
F.Alignment;
1334 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
1335 uint64_t Padding = DataAlignV - Rem;
1347 CurrentOffset += Padding;
1350 LocalVars.push_back(FGV);
1352 CurrentOffset +=
F.Size;
1356 std::vector<Type *> LocalVarTypes;
1357 LocalVarTypes.reserve(LocalVars.size());
1359 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1374 for (
size_t I = 0;
I < LocalVars.size();
I++) {
1376 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32,
I)};
1378 if (IsPaddingField[
I]) {
1385 assert(Map.size() == LDSVarsToTransform.
size());
1386 return {SGV, std::move(Map)};
1389 template <
typename PredicateTy>
1390 static void replaceLDSVariablesWithStruct(
1392 const LDSVariableReplacement &Replacement, PredicateTy
Predicate) {
1399 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1400 LDSVarsToTransformArg.
begin(), LDSVarsToTransformArg.
end()));
1406 const size_t NumberVars = LDSVarsToTransform.
size();
1407 if (NumberVars > 1) {
1409 AliasScopes.
reserve(NumberVars);
1411 for (
size_t I = 0;
I < NumberVars;
I++) {
1415 NoAliasList.
append(&AliasScopes[1], AliasScopes.
end());
1420 for (
size_t I = 0;
I < NumberVars;
I++) {
1422 Constant *
GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1426 APInt APOff(
DL.getIndexTypeSizeInBits(
GEP->getType()), 0);
1427 GEP->stripAndAccumulateInBoundsConstantOffsets(
DL, APOff);
1434 NoAliasList[
I - 1] = AliasScopes[
I - 1];
1440 refineUsesAlignmentAndAA(
GEP,
A,
DL, AliasScope, NoAlias);
1446 MDNode *NoAlias,
unsigned MaxDepth = 5) {
1447 if (!MaxDepth || (
A == 1 && !AliasScope))
1452 for (
User *U :
Ptr->users()) {
1454 if (AliasScope &&
I->mayReadOrWriteMemory()) {
1455 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
1458 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1460 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
1484 if (Intersection.empty()) {
1489 I->setMetadata(LLVMContext::MD_noalias, NA);
1494 LI->setAlignment(std::max(
A, LI->getAlign()));
1498 if (
SI->getPointerOperand() ==
Ptr)
1499 SI->setAlignment(std::max(
A,
SI->getAlign()));
1505 if (AI->getPointerOperand() ==
Ptr)
1506 AI->setAlignment(std::max(
A, AI->getAlign()));
1510 if (AI->getPointerOperand() ==
Ptr)
1511 AI->setAlignment(std::max(
A, AI->getAlign()));
1515 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
1517 if (
GEP->getPointerOperand() ==
Ptr) {
1519 if (
GEP->accumulateConstantOffset(
DL, Off))
1521 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
1527 if (
I->getOpcode() == Instruction::BitCast ||
1528 I->getOpcode() == Instruction::AddrSpaceCast)
1529 refineUsesAlignmentAndAA(
I,
A,
DL, AliasScope, NoAlias, MaxDepth - 1);
1535class AMDGPULowerModuleLDSLegacy :
public ModulePass {
1548 bool runOnModule(
Module &M)
override {
1550 auto &TPC = getAnalysis<TargetPassConfig>();
1559char AMDGPULowerModuleLDSLegacy::ID = 0;
1564 "Lower uses of LDS variables from non-kernel functions",
1568 "Lower uses of LDS variables from non-kernel functions",
1573 return new AMDGPULowerModuleLDSLegacy(TM);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LinkageTypes getLinkage() const
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
ThreadLocalMode getThreadLocalMode() const
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI void copyAttributesFrom(const GlobalVariable *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool runOnModule(Module &) override
ImmutablePasses are never run.
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains) const
Collect the set of scoped domains relevant to the noalias scopes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ BARRIER_SCOPE_WORKGROUP
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
bool isKernelLDS(const Function *F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void sort(IteratorTy Start, IteratorTy End)
char & AMDGPULowerModuleLDSLegacyPassID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.