xref: /freebsd/contrib/llvm-project/clang/lib/Interpreter/DeviceOffload.cpp (revision 5036d9652a5701d00e9e40ea942c278e9f77d33d)
1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements offloading to CUDA devices.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "DeviceOffload.h"
14 
15 #include "clang/Basic/TargetOptions.h"
16 #include "clang/CodeGen/ModuleBuilder.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 
19 #include "llvm/IR/LegacyPassManager.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/MC/TargetRegistry.h"
22 #include "llvm/Target/TargetMachine.h"
23 
24 namespace clang {
25 
26 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
27     Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
28     IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
29     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
30     llvm::Error &Err)
31     : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
32       HostParser(HostParser), VFS(FS) {
33   if (Err)
34     return;
35   StringRef Arch = CI->getTargetOpts().CPU;
36   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
37     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
38                                                "Invalid CUDA architecture",
39                                                llvm::inconvertibleErrorCode()));
40     return;
41   }
42 }
43 
44 llvm::Expected<PartialTranslationUnit &>
45 IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
46   auto PTU = IncrementalParser::Parse(Input);
47   if (!PTU)
48     return PTU.takeError();
49 
50   auto PTX = GeneratePTX();
51   if (!PTX)
52     return PTX.takeError();
53 
54   auto Err = GenerateFatbinary();
55   if (Err)
56     return std::move(Err);
57 
58   std::string FatbinFileName =
59       "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
60   VFS->addFile(FatbinFileName, 0,
61                llvm::MemoryBuffer::getMemBuffer(
62                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
63                    "", false));
64 
65   HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;
66 
67   FatbinContent.clear();
68 
69   return PTU;
70 }
71 
72 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
73   auto &PTU = PTUs.back();
74   std::string Error;
75 
76   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
77       PTU.TheModule->getTargetTriple(), Error);
78   if (!Target)
79     return llvm::make_error<llvm::StringError>(std::move(Error),
80                                                std::error_code());
81   llvm::TargetOptions TO = llvm::TargetOptions();
82   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
83       PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
84       llvm::Reloc::Model::PIC_);
85   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
86 
87   PTXCode.clear();
88   llvm::raw_svector_ostream dest(PTXCode);
89 
90   llvm::legacy::PassManager PM;
91   if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
92                                          llvm::CodeGenFileType::AssemblyFile)) {
93     return llvm::make_error<llvm::StringError>(
94         "NVPTX backend cannot produce PTX code.",
95         llvm::inconvertibleErrorCode());
96   }
97 
98   if (!PM.run(*PTU.TheModule))
99     return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
100                                                llvm::inconvertibleErrorCode());
101 
102   PTXCode += '\0';
103   while (PTXCode.size() % 8)
104     PTXCode += '\0';
105   return PTXCode.str();
106 }
107 
108 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
109   enum FatBinFlags {
110     AddressSize64 = 0x01,
111     HasDebugInfo = 0x02,
112     ProducerCuda = 0x04,
113     HostLinux = 0x10,
114     HostMac = 0x20,
115     HostWindows = 0x40
116   };
117 
118   struct FatBinInnerHeader {
119     uint16_t Kind;             // 0x00
120     uint16_t unknown02;        // 0x02
121     uint32_t HeaderSize;       // 0x04
122     uint32_t DataSize;         // 0x08
123     uint32_t unknown0c;        // 0x0c
124     uint32_t CompressedSize;   // 0x10
125     uint32_t SubHeaderSize;    // 0x14
126     uint16_t VersionMinor;     // 0x18
127     uint16_t VersionMajor;     // 0x1a
128     uint32_t CudaArch;         // 0x1c
129     uint32_t unknown20;        // 0x20
130     uint32_t unknown24;        // 0x24
131     uint32_t Flags;            // 0x28
132     uint32_t unknown2c;        // 0x2c
133     uint32_t unknown30;        // 0x30
134     uint32_t unknown34;        // 0x34
135     uint32_t UncompressedSize; // 0x38
136     uint32_t unknown3c;        // 0x3c
137     uint32_t unknown40;        // 0x40
138     uint32_t unknown44;        // 0x44
139     FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
140         : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
141           DataSize(DataSize), unknown0c(0), CompressedSize(0),
142           SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
143           CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
144           unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
145           unknown3c(0), unknown40(0), unknown44(0) {}
146   };
147 
148   struct FatBinHeader {
149     uint32_t Magic;      // 0x00
150     uint16_t Version;    // 0x04
151     uint16_t HeaderSize; // 0x06
152     uint32_t DataSize;   // 0x08
153     uint32_t unknown0c;  // 0x0c
154   public:
155     FatBinHeader(uint32_t DataSize)
156         : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
157           DataSize(DataSize), unknown0c(0) {}
158   };
159 
160   FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
161   FatbinContent.append((char *)&OuterHeader,
162                        ((char *)&OuterHeader) + OuterHeader.HeaderSize);
163 
164   FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
165                                 FatBinFlags::AddressSize64 |
166                                     FatBinFlags::HostLinux);
167   FatbinContent.append((char *)&InnerHeader,
168                        ((char *)&InnerHeader) + InnerHeader.HeaderSize);
169 
170   FatbinContent.append(PTXCode.begin(), PTXCode.end());
171 
172   return llvm::Error::success();
173 }
174 
175 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
176 
177 } // namespace clang
178