clang 22.0.0git
DeviceOffload.cpp
Go to the documentation of this file.
1//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements offloading to CUDA devices.
10//
11//===----------------------------------------------------------------------===//
12
13#include "DeviceOffload.h"
14
19
20#include "llvm/IR/LegacyPassManager.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/TargetRegistry.h"
23#include "llvm/Target/TargetMachine.h"
24
25namespace clang {
26
28 CompilerInstance &DeviceInstance, CompilerInstance &HostInstance,
29 IncrementalAction *DeviceAct,
31 llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs)
32 : IncrementalParser(DeviceInstance, DeviceAct, Err, PTUs), VFS(FS),
33 CodeGenOpts(HostInstance.getCodeGenOpts()),
34 TargetOpts(DeviceInstance.getTargetOpts()) {
35 if (Err)
36 return;
37 StringRef Arch = TargetOpts.CPU;
38 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
39 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
40 "Invalid CUDA architecture",
41 llvm::inconvertibleErrorCode()));
42 return;
43 }
44}
45
47 auto &PTU = PTUs.back();
48 std::string Error;
49
50 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
51 PTU.TheModule->getTargetTriple(), Error);
52 if (!Target)
53 return llvm::make_error<llvm::StringError>(std::move(Error),
54 std::error_code());
55 llvm::TargetOptions TO = llvm::TargetOptions();
56 llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
57 PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
58 llvm::Reloc::Model::PIC_);
59 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
60
61 PTXCode.clear();
62 llvm::raw_svector_ostream dest(PTXCode);
63
64 llvm::legacy::PassManager PM;
65 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
66 llvm::CodeGenFileType::AssemblyFile)) {
67 return llvm::make_error<llvm::StringError>(
68 "NVPTX backend cannot produce PTX code.",
69 llvm::inconvertibleErrorCode());
70 }
71
72 if (!PM.run(*PTU.TheModule))
73 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
74 llvm::inconvertibleErrorCode());
75
76 PTXCode += '\0';
77 while (PTXCode.size() % 8)
78 PTXCode += '\0';
79 return PTXCode.str();
80}
81
83 enum FatBinFlags {
84 AddressSize64 = 0x01,
85 HasDebugInfo = 0x02,
86 ProducerCuda = 0x04,
87 HostLinux = 0x10,
88 HostMac = 0x20,
89 HostWindows = 0x40
90 };
91
92 struct FatBinInnerHeader {
93 uint16_t Kind; // 0x00
94 uint16_t unknown02; // 0x02
95 uint32_t HeaderSize; // 0x04
96 uint32_t DataSize; // 0x08
97 uint32_t unknown0c; // 0x0c
98 uint32_t CompressedSize; // 0x10
99 uint32_t SubHeaderSize; // 0x14
100 uint16_t VersionMinor; // 0x18
101 uint16_t VersionMajor; // 0x1a
102 uint32_t CudaArch; // 0x1c
103 uint32_t unknown20; // 0x20
104 uint32_t unknown24; // 0x24
105 uint32_t Flags; // 0x28
106 uint32_t unknown2c; // 0x2c
107 uint32_t unknown30; // 0x30
108 uint32_t unknown34; // 0x34
109 uint32_t UncompressedSize; // 0x38
110 uint32_t unknown3c; // 0x3c
111 uint32_t unknown40; // 0x40
112 uint32_t unknown44; // 0x44
113 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
114 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
115 DataSize(DataSize), unknown0c(0), CompressedSize(0),
116 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
117 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
118 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
119 unknown3c(0), unknown40(0), unknown44(0) {}
120 };
121
122 struct FatBinHeader {
123 uint32_t Magic; // 0x00
124 uint16_t Version; // 0x04
125 uint16_t HeaderSize; // 0x06
126 uint32_t DataSize; // 0x08
127 uint32_t unknown0c; // 0x0c
128 public:
129 FatBinHeader(uint32_t DataSize)
130 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
131 DataSize(DataSize), unknown0c(0) {}
132 };
133
134 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
135 FatbinContent.append((char *)&OuterHeader,
136 ((char *)&OuterHeader) + OuterHeader.HeaderSize);
137
138 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
139 FatBinFlags::AddressSize64 |
140 FatBinFlags::HostLinux);
141 FatbinContent.append((char *)&InnerHeader,
142 ((char *)&InnerHeader) + InnerHeader.HeaderSize);
143
144 FatbinContent.append(PTXCode.begin(), PTXCode.end());
145
146 const PartialTranslationUnit &PTU = PTUs.back();
147
148 std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
149
150 VFS->addFile(FatbinFileName, 0,
151 llvm::MemoryBuffer::getMemBuffer(
152 llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
153 "", false));
154
155 CodeGenOpts.CudaGpuBinaryFileName = std::move(FatbinFileName);
156
157 FatbinContent.clear();
158
159 return llvm::Error::success();
160}
161
163
164} // namespace clang
Defines the clang::TargetOptions class.
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
A custom action enabling the incremental processing functionality.
llvm::SmallVector< char, 1024 > FatbinContent
llvm::SmallString< 1024 > PTXCode
IncrementalCUDADeviceParser(CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, IncrementalAction *DeviceAct, llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS, llvm::Error &Err, std::list< PartialTranslationUnit > &PTUs)
llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS
llvm::Expected< llvm::StringRef > GeneratePTX()
const TargetOptions & TargetOpts
IncrementalParser(CompilerInstance &Instance, IncrementalAction *Act, llvm::Error &Err, std::list< PartialTranslationUnit > &PTUs)
std::list< PartialTranslationUnit > & PTUs
The JSON file list parser is used to communicate input to InstallAPI.
The class keeps track of various objects created as part of processing incremental inputs.
std::unique_ptr< llvm::Module > TheModule
The llvm IR produced for the input.