clang 22.0.0git
NVPTX.cpp
Go to the documentation of this file.
1//===- NVPTX.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "TargetInfo.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/StringExtras.h"
13#include "llvm/IR/CallingConv.h"
14#include "llvm/IR/IntrinsicsNVPTX.h"
15
16using namespace clang;
17using namespace clang::CodeGen;
18
19//===----------------------------------------------------------------------===//
20// NVPTX ABI Implementation
21//===----------------------------------------------------------------------===//
22
23namespace {
24
25class NVPTXTargetCodeGenInfo;
26
27class NVPTXABIInfo : public ABIInfo {
28 NVPTXTargetCodeGenInfo &CGInfo;
29
30public:
31 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
32 : ABIInfo(CGT), CGInfo(Info) {}
33
34 ABIArgInfo classifyReturnType(QualType RetTy) const;
35 ABIArgInfo classifyArgumentType(QualType Ty) const;
36
37 void computeInfo(CGFunctionInfo &FI) const override;
38 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
39 AggValueSlot Slot) const override;
40 bool isUnsupportedType(QualType T) const;
41 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
42};
43
44class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
45public:
46 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
47 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
48
49 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
50 CodeGen::CodeGenModule &M) const override;
51 bool shouldEmitStaticExternCAliases() const override;
52
53 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
54 llvm::PointerType *T,
55 QualType QT) const override;
56
57 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
58 // On the device side, surface reference is represented as an object handle
59 // in 64-bit integer.
60 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
61 }
62
63 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
64 // On the device side, texture reference is represented as an object handle
65 // in 64-bit integer.
66 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
67 }
68
69 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
70 LValue Src) const override {
71 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
72 return true;
73 }
74
75 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
76 LValue Src) const override {
77 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
78 return true;
79 }
80
81 unsigned getDeviceKernelCallingConv() const override {
82 return llvm::CallingConv::PTX_Kernel;
83 }
84
85 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
86 // resulting MDNode to the nvvm.annotations MDNode.
87 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
88 int Operand);
89
90private:
91 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
92 LValue Src) {
93 llvm::Value *Handle = nullptr;
94 llvm::Constant *C =
95 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
96 // Lookup `addrspacecast` through the constant pointer if any.
97 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
98 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
99 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
100 // Load the handle from the specific global variable using
101 // `nvvm.texsurf.handle.internal` intrinsic.
102 Handle = CGF.EmitRuntimeCall(
103 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
104 {GV->getType()}),
105 {GV}, "texsurf_handle");
106 } else
107 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
108 CGF.EmitStoreOfScalar(Handle, Dst);
109 }
110};
111
112/// Checks if the type is unsupported directly by the current target.
113bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
114 ASTContext &Context = getContext();
115 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
116 return true;
117 if (!Context.getTargetInfo().hasFloat128Type() &&
118 (T->isFloat128Type() ||
119 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
120 return true;
121 if (const auto *EIT = T->getAs<BitIntType>())
122 return EIT->getNumBits() >
123 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
124 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
125 Context.getTypeSize(T) > 64U)
126 return true;
127 if (const auto *AT = T->getAsArrayTypeUnsafe())
128 return isUnsupportedType(AT->getElementType());
129 const auto *RD = T->getAsRecordDecl();
130 if (!RD)
131 return false;
132
133 // If this is a C++ record, check the bases first.
134 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
135 for (const CXXBaseSpecifier &I : CXXRD->bases())
136 if (isUnsupportedType(I.getType()))
137 return true;
138
139 for (const FieldDecl *I : RD->fields())
140 if (isUnsupportedType(I->getType()))
141 return true;
142 return false;
143}
144
145/// Coerce the given type into an array with maximum allowed size of elements.
146ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
147 unsigned MaxSize) const {
148 // Alignment and Size are measured in bits.
149 const uint64_t Size = getContext().getTypeSize(Ty);
150 const uint64_t Alignment = getContext().getTypeAlign(Ty);
151 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
152 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
153 const uint64_t NumElements = (Size + Div - 1) / Div;
154 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
155}
156
157ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
158 if (RetTy->isVoidType())
159 return ABIArgInfo::getIgnore();
160
161 if (getContext().getLangOpts().OpenMP &&
162 getContext().getLangOpts().OpenMPIsTargetDevice &&
163 isUnsupportedType(RetTy))
164 return coerceToIntArrayWithLimit(RetTy, 64);
165
166 // note: this is different from default ABI
167 if (!RetTy->isScalarType())
168 return ABIArgInfo::getDirect();
169
170 // Treat an enum type as its underlying type.
171 if (const auto *ED = RetTy->getAsEnumDecl())
172 RetTy = ED->getIntegerType();
173
174 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
176}
177
178ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
179 // Treat an enum type as its underlying type.
180 if (const auto *ED = Ty->getAsEnumDecl())
181 Ty = ED->getIntegerType();
182
183 // Return aggregates type as indirect by value
184 if (isAggregateTypeForABI(Ty)) {
185 // Under CUDA device compilation, tex/surf builtin types are replaced with
186 // object types and passed directly.
187 if (getContext().getLangOpts().CUDAIsDevice) {
190 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
193 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
194 }
195 return getNaturalAlignIndirect(
196 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
197 /* byval */ true);
198 }
199
200 if (const auto *EIT = Ty->getAs<BitIntType>()) {
201 if ((EIT->getNumBits() > 128) ||
202 (!getContext().getTargetInfo().hasInt128Type() &&
203 EIT->getNumBits() > 64))
204 return getNaturalAlignIndirect(
205 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
206 /* byval */ true);
207 }
208
209 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
211}
212
213void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
214 if (!getCXXABI().classifyReturnType(FI))
216
217 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
218 I.info = ArgumentsCount < FI.getNumRequiredArgs()
219 ? classifyArgumentType(I.type)
220 : ABIArgInfo::getDirect();
221
222 // Always honor user-specified calling convention.
223 if (FI.getCallingConvention() != llvm::CallingConv::C)
224 return;
225
226 FI.setEffectiveCallingConvention(getRuntimeCC());
227}
228
229RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
230 QualType Ty, AggValueSlot Slot) const {
231 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
232 getContext().getTypeInfoInChars(Ty),
234 /*AllowHigherAlign=*/true, Slot);
235}
236
237void NVPTXTargetCodeGenInfo::setTargetAttributes(
238 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
239 if (GV->isDeclaration())
240 return;
241 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
242 if (VD) {
243 if (M.getLangOpts().CUDA) {
245 addNVVMMetadata(GV, "surface", 1);
246 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
247 addNVVMMetadata(GV, "texture", 1);
248 return;
249 }
250 }
251
252 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
253 if (!FD)
254 return;
255
256 llvm::Function *F = cast<llvm::Function>(GV);
257
258 // Perform special handling in OpenCL/CUDA mode
259 if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
260 // Use function attributes to check for kernel functions
261 // By default, all functions are device functions
262 if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
263 // OpenCL/CUDA kernel functions get kernel metadata
264 // And kernel functions are not subject to inlining
265 F->addFnAttr(llvm::Attribute::NoInline);
266 if (FD->hasAttr<CUDAGlobalAttr>()) {
267 F->setCallingConv(llvm::CallingConv::PTX_Kernel);
268
269 for (auto IV : llvm::enumerate(FD->parameters()))
270 if (IV.value()->hasAttr<CUDAGridConstantAttr>())
271 F->addParamAttr(
272 IV.index(),
273 llvm::Attribute::get(F->getContext(), "nvvm.grid_constant"));
274 }
275 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
277 }
278 }
279 // Attach kernel metadata directly if compiling for NVPTX.
280 if (FD->hasAttr<DeviceKernelAttr>())
281 F->setCallingConv(llvm::CallingConv::PTX_Kernel);
282}
283
284void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
285 StringRef Name, int Operand) {
286 llvm::Module *M = GV->getParent();
287 llvm::LLVMContext &Ctx = M->getContext();
288
289 // Get "nvvm.annotations" metadata node
290 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
291
292 SmallVector<llvm::Metadata *, 5> MDVals = {
293 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
294 llvm::ConstantAsMetadata::get(
295 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
296
297 // Append metadata to nvvm.annotations
298 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
299}
300
301bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
302 return false;
303}
304
305llvm::Constant *
306NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
307 llvm::PointerType *PT,
308 QualType QT) const {
309 auto &Ctx = CGM.getContext();
310 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
311 return llvm::ConstantPointerNull::get(PT);
312
313 auto NPT = llvm::PointerType::get(
314 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
315 return llvm::ConstantExpr::getAddrSpaceCast(
316 llvm::ConstantPointerNull::get(NPT), PT);
317}
318} // namespace
319
321 const CUDALaunchBoundsAttr *Attr,
322 int32_t *MaxThreadsVal,
323 int32_t *MinBlocksVal,
324 int32_t *MaxClusterRankVal) {
325 llvm::APSInt MaxThreads(32);
326 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
327 if (MaxThreads > 0) {
328 if (MaxThreadsVal)
329 *MaxThreadsVal = MaxThreads.getExtValue();
330 if (F)
331 F->addFnAttr("nvvm.maxntid", llvm::utostr(MaxThreads.getExtValue()));
332 }
333
334 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
335 // was not specified in __launch_bounds__ or if the user specified a 0 value,
336 // we don't have to add a PTX directive.
337 if (Attr->getMinBlocks()) {
338 llvm::APSInt MinBlocks(32);
339 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
340 if (MinBlocks > 0) {
341 if (MinBlocksVal)
342 *MinBlocksVal = MinBlocks.getExtValue();
343 if (F)
344 F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue()));
345 }
346 }
347 if (Attr->getMaxBlocks()) {
348 llvm::APSInt MaxBlocks(32);
349 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
350 if (MaxBlocks > 0) {
351 if (MaxClusterRankVal)
352 *MaxClusterRankVal = MaxBlocks.getExtValue();
353 if (F)
354 F->addFnAttr("nvvm.maxclusterrank",
355 llvm::utostr(MaxBlocks.getExtValue()));
356 }
357 }
358}
359
360std::unique_ptr<TargetCodeGenInfo>
362 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
363}
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:856
Attr - This represents one attribute.
Definition Attr.h:44
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getExtend(QualType Ty, llvm::Type *T=nullptr)
ABIInfo - Target specific hooks for defining how a type should be passed or returned from functions.
Definition ABIInfo.h:48
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
void setEffectiveCallingConvention(unsigned Value)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition NVPTX.cpp:320
const LangOptions & getLangOpts() const
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
Definition TargetInfo.h:47
T * getAttr() const
Definition DeclBase.h:573
bool hasAttr() const
Definition DeclBase.h:577
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2771
A (possibly-)qualified type.
Definition TypeBase.h:937
virtual bool hasInt128Type() const
Determine whether the __int128 type is supported on this target.
Definition TargetInfo.h:673
virtual bool hasFloat16Type() const
Determine whether the _Float16 type is supported on this target.
Definition TargetInfo.h:715
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
Definition TargetInfo.h:712
bool isVoidType() const
Definition TypeBase.h:8878
bool isFloat16Type() const
Definition TypeBase.h:8887
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8922
bool isScalarType() const
Definition TypeBase.h:8980
bool isFloat128Type() const
Definition TypeBase.h:8907
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
Definition Type.cpp:5334
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
Definition Type.cpp:5343
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9151
EnumDecl * getAsEnumDecl() const
Retrieves the EnumDecl this type refers to.
Definition Type.h:53
bool isRealFloatingType() const
Floating point categories.
Definition Type.cpp:2320
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9098
QualType getType() const
Definition Decl.h:722
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:145
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createNVPTXTargetCodeGenInfo(CodeGenModule &CGM)
Definition NVPTX.cpp:361
RValue emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign, AggValueSlot Slot, bool ForceRightAdjust=false)
Emit va_arg for a platform using the common void* representation, where arguments are simply emitted ...
bool isAggregateTypeForABI(QualType T)
bool Div(InterpState &S, CodePtr OpPC)
1) Pops the RHS from the stack.
Definition Interp.h:692
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
U cast(CodeGen::Address addr)
Definition Address.h:327
unsigned long uint64_t