31 llvm::Error &Err, std::list<PartialTranslationUnit> &
PTUs)
39 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
40 "Invalid CUDA architecture",
41 llvm::inconvertibleErrorCode()));
47 auto &PTU =
PTUs.back();
50 const llvm::Target *
Target = llvm::TargetRegistry::lookupTarget(
51 PTU.TheModule->getTargetTriple(),
Error);
53 return llvm::make_error<llvm::StringError>(std::move(
Error),
55 llvm::TargetOptions TO = llvm::TargetOptions();
56 llvm::TargetMachine *TargetMachine =
Target->createTargetMachine(
57 PTU.TheModule->getTargetTriple(),
TargetOpts.CPU,
"", TO,
58 llvm::Reloc::Model::PIC_);
59 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
62 llvm::raw_svector_ostream dest(
PTXCode);
64 llvm::legacy::PassManager PM;
65 if (TargetMachine->addPassesToEmitFile(PM, dest,
nullptr,
66 llvm::CodeGenFileType::AssemblyFile)) {
67 return llvm::make_error<llvm::StringError>(
68 "NVPTX backend cannot produce PTX code.",
69 llvm::inconvertibleErrorCode());
72 if (!PM.run(*PTU.TheModule))
73 return llvm::make_error<llvm::StringError>(
"Failed to emit PTX code.",
74 llvm::inconvertibleErrorCode());
92 struct FatBinInnerHeader {
98 uint32_t CompressedSize;
99 uint32_t SubHeaderSize;
100 uint16_t VersionMinor;
101 uint16_t VersionMajor;
109 uint32_t UncompressedSize;
113 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
114 : Kind(1 ), unknown02(0x0101), HeaderSize(
sizeof(*
this)),
115 DataSize(DataSize), unknown0c(0), CompressedSize(0),
116 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
117 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
118 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
119 unknown3c(0), unknown40(0), unknown44(0) {}
122 struct FatBinHeader {
129 FatBinHeader(uint32_t DataSize)
130 : Magic(0xba55ed50), Version(1), HeaderSize(
sizeof(*
this)),
131 DataSize(DataSize), unknown0c(0) {}
134 FatBinHeader OuterHeader(
sizeof(FatBinInnerHeader) +
PTXCode.size());
136 ((
char *)&OuterHeader) + OuterHeader.HeaderSize);
139 FatBinFlags::AddressSize64 |
140 FatBinFlags::HostLinux);
142 ((
char *)&InnerHeader) + InnerHeader.HeaderSize);
148 std::string FatbinFileName =
"/" + PTU.
TheModule->getName().str() +
".fatbin";
150 VFS->addFile(FatbinFileName, 0,
151 llvm::MemoryBuffer::getMemBuffer(
155 CodeGenOpts.CudaGpuBinaryFileName = std::move(FatbinFileName);
159 return llvm::Error::success();
IncrementalCUDADeviceParser(CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, IncrementalAction *DeviceAct, llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS, llvm::Error &Err, std::list< PartialTranslationUnit > &PTUs)